PH Covid-19 Data Analysis

By: Marlon Teodosio

In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import requests
import matplotlib.pyplot as plt
import pandas_profiling as pp
from io import BytesIO, StringIO
In [137]:
print("Last update:", pd.to_datetime('today').strftime("%b %d, %Y %H:%M:%S"))
Last update: May 28, 2020 18:19:18
In [3]:
import plotly.express as px
import plotly.io as pio
pio.templates
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pio.templates.default = 'plotly_white'

import plotly.offline as py
py.init_notebook_mode()
In [4]:
from IPython.display import HTML

HTML('''<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }

  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')
Out[4]:

Data Source

The data is extracted from the official DOH COVID-19 DataDrop.

In [5]:
#########################################################################
########################## 05 Case Information ##########################
#########################################################################

data_src = 'https://drive.google.com/open?id=1fed36fegqVEQM-h3WhskR0oCELI2zQ73' # Get shareable link from DOH Data Drop
file_id = data_src.split('=')[1]
dwn_url='https://drive.google.com/uc?export=download&id=' + file_id
url = requests.get(dwn_url).text
csv_raw = StringIO(url)
covid_ph = pd.read_csv(csv_raw, encoding = 'utf-8')

for col in covid_ph.columns:
    if 'Date' in col:
        covid_ph[col] = pd.to_datetime(covid_ph[col], dayfirst=True)

covid_ph['CityMunRes'] = covid_ph['CityMunRes'].str.replace(u\x91', 'Ñ')
covid_ph['CityMunRes'] = covid_ph['CityMunRes'].str.replace(u\x83±', 'Ñ')
covid_ph['RemovalType'] = covid_ph['RemovalType'].fillna('Active')

############################## CHECKER #################################
# covid_ph[covid_ph.RegionRes == 'NCR'].groupby('CityMunRes')['CaseCode'].count().sort_values(ascending=False).index.tolist()
In [6]:
print("Case Information Data as of:", covid_ph.DateRepConf.max().date().strftime("%b %d"))

# save the latest file
covid_ph.to_excel("./data/covid_ph_database.xlsx")
Case Information Data as of: May 26
In [7]:
########################################################################
######################### 07 Testing Aggregate #########################
########################################################################

url = 'https://drive.google.com/open?id=1cXfNiC7RoshSfW4FoBMEOB3VAu9qMrcS'
file_id = url.split('=')[1]
dwn_url = 'https://drive.google.com/uc?export=download&id=' + file_id
url = requests.get(dwn_url).text
csv_raw = StringIO(url)
df_tests = pd.read_csv(csv_raw)

# format date
df_tests['report_date'] = pd.to_datetime(df_tests['report_date'], dayfirst=True)

# change NAN & "-" to 0 (considering that these are equivalent to N/A or no value)
# df_tests['REMAINING AVAILABLE TESTS'] = df_tests['REMAINING AVAILABLE TESTS'].fillna("0")
# df_tests['remaining_available_tests'] = df_tests['remaining_available_tests'].str.replace("-", "0")

# # format count columns
# cols = [col for col in df_tests.columns if 'INDIVIDUALS' in col]
# for col in cols:
#     df_tests[col] = df_tests[col].str.replace(',', '').astype(int)
    
# df_tests['TOTAL TESTS CONDUCTED'] = df_tests['TOTAL TESTS CONDUCTED'].str.replace(',', '').astype(int)
# df_tests['remaining_available_tests'] = df_tests['remaining_available_tests'].str.replace(',', '').astype(int)
# df_tests.tail()
In [8]:
print("Facility Testing Data as of:", df_tests.report_date.max().date().strftime("%b %d"))
df_tests.to_excel("./data/testingaggregates.xlsx")
Facility Testing Data as of: May 25
In [9]:
########################################################################
################## 05 DOH Data Collect - Daily Report ##################
########################################################################

url = 'https://drive.google.com/open?id=1ocaAEwyVDU7rlYbU0D-xv13tQwEAJ5h4'
file_id = url.split('=')[1]
dwn_url='https://drive.google.com/uc?export=download&id=' + file_id
url = requests.get(dwn_url).text
csv_raw = StringIO(url)
df_hosp = pd.read_csv(csv_raw)      
df_hosp['reportdate'] = pd.to_datetime(df_hosp['reportdate'])
In [10]:
print("Hospital, Bed & MechVent Data as of:", df_hosp.reportdate.max().date().strftime("%b %d"))
Hospital, Bed & MechVent Data as of: May 25
In [11]:
# HOSPITAL - PPE
# url = 'https://drive.google.com/open?id=1b3NGhxEn59dubEnDHxf7F1taYfxkUHn1'
# file_id = url.split('=')[1]
# dwn_url='https://drive.google.com/uc?export=download&id=' + file_id
# url = requests.get(dwn_url).text
# csv_raw = StringIO(url)
# df_ppe = pd.read_csv(csv_raw)
        
# df_ppe.head(5)

Data Analysis

PH Latest Statistics

In [12]:
total_cases = len(covid_ph)
recovered = len(covid_ph.loc[covid_ph.RemovalType=='Recovered'])
died = len(covid_ph.loc[covid_ph.RemovalType=='Died'])
active_cases = total_cases - recovered - died
percent_active = active_cases / total_cases
percent_recovered = recovered / total_cases
percent_died = died / total_cases
asof = pd.to_datetime(covid_ph.DateRepConf.unique().max()) # latest available data
new = len(covid_ph[covid_ph.DateRepConf == asof])
In [13]:
cases_daily = (covid_ph.groupby('DateRepConf')['CaseCode']
               .count().resample('1D').sum())

# recoveries_daily = (covid_ph.loc[df.RemovalType=='Recovered']
#                     .groupby('DateRecover')['CaseCode'].count().resample('1D').sum())
recoveries_daily = (covid_ph.loc[covid_ph.RemovalType=='Recovered']
                    .groupby('DateRepRem')['CaseCode'].count().resample('1D').sum())

# deaths_daily = (covid_ph.loc[df.RemovalType=='Died']
#                 .groupby('DateDied')['CaseCode'].count().resample('1D').sum())
deaths_daily = (covid_ph.loc[covid_ph.RemovalType=='Died']
                .groupby('DateRepRem')['CaseCode'].count().resample('1D').sum())

cases_daily.to_excel("./data/cases_daily.xlsx")
recoveries_daily.to_excel("./data/recoveries_daily.xlsx")
deaths_daily.to_excel("./data/deaths_daily.xlsx")
In [14]:
data = pd.DataFrame(data=[[active_cases, percent_active, f"{100*percent_active:.1f}%"],
                          [died, percent_died, f"{100*percent_died:.1f}%"],
                          [recovered, percent_recovered, f"{100*percent_recovered:.1f}%"]],
                    index=['Active', 'Died', 'Recovered'], 
                    columns=['count', 'percent', 'pct']).reset_index()
data['cum_percent'] = data.percent.cumsum()
data['new'] = pd.Series([cases_daily[-1]-deaths_daily[-1]-recoveries_daily[-1], deaths_daily[-1], recoveries_daily[-1]])

# Display the Dashboard
print('\033[1m' + '\033[94m' + "PH COVID-19 Tracker" + '\033[0m')
print("As of", asof.date().strftime("%b %d, %Y"))
fig = px.bar(data, x='percent', orientation='h', color='index', 
             text='count', hover_name='pct', height=250, 
             labels={'percent': " ", 'index':'PatientType'})
fig.update_layout(legend=dict(orientation='h', x=-0.01, y=-0.1, title="", font_size=14), 
                  title=("Total Confirmed Cases: " + f"<b>{total_cases}</b>"), 
                  hovermode=False)
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=16))
for i in range(0, len(data.percent.unique())):
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=data.cum_percent.unique()[i]-0.001, y=0.6,
                            text=data.pct[i]), font_size=12,
                       font_color = px.colors.qualitative.Plotly[i])
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=data.cum_percent.unique()[i]-0.001, y=-0.7,
                            text="(+"+str(data.new[i])+")"), font_size=14,
                       font_color = px.colors.qualitative.Plotly[i])
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=0.18, y=-0.7,
                            text='New Cases: (+'+str(cases_daily[-1])+')', font_size=14))
    
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()
PH COVID-19 Tracker
As of May 26, 2020
In [130]:
covid_ph1 = covid_ph.copy()
covid_ph1['RemovalType'] = covid_ph1.RemovalType.fillna('Active')
data = covid_ph1[covid_ph1.RemovalType == 'Active'].groupby('HealthStatus').CaseCode.count().reset_index().rename({'CaseCode': 'Count'}, axis=1)
data['Percent'] = data['Count']/data['Count'].sum()*100
data['Pct'] = round(data['Percent'], 1).astype(str)+'%'
data['Cum_percent'] = data.Percent.cumsum()

# a = pd.Series(['Critical', 'Severe', 'Mild', 'Asymptomatic']).reset_index().rename({0: 'HealthStatus'}, axis=1)
a = pd.Series(['Asymptomatic', 'Mild', 'Severe', 'Critical']).reset_index().rename({0: 'HealthStatus'}, axis=1)
data1 = pd.merge(data, a, how='left', on='HealthStatus') 
data1 = data1.sort_values(by='index', ascending=False)
data1['Count_percent'] = data['Count'].astype(str) + " (" + data1['Pct'] + ")"

fig = px.bar(data1, x='Count', y='HealthStatus', orientation='h',  
             text='Count', hover_name='Pct', height=400)
fig.update_layout(title=("Health Status of Active Cases"), showlegend=False,
                  hoverlabel=dict(font_size=17, bgcolor='blue'), hovermode='y', margin=dict(l=200))
fig.update_traces(textposition='outside', textfont_size=15)
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False, title=None,
                 range=[0, data1.Count.sum()])
fig.update_yaxes(title=None, tickfont_size=15)
fig.show()
In [131]:
topcity = covid_ph[covid_ph.RegionRes == 'NCR'].groupby('CityMunRes')['CaseCode'].count().sort_values(ascending=False).index.tolist()
# topcity.remove('For Validation')
# topcity.append('For Validation')
topcity

data01 = pd.DataFrame()
for i in list(reversed(topcity)):
    df = covid_ph[covid_ph.CityMunRes == i].groupby('RemovalType').count()['CaseCode'].reset_index()
    df = df.sort_values('RemovalType')
    df['City'] = i
    df = df.rename({'CaseCode': 'Count0', 'RemovalType': 'Type'}, axis=1)
    df['Percent'] = df['Count0'] / df['Count0'].sum()*100
    df['Pct'] = round(df['Percent'], 1).astype(str)+'%'
    data01 = data01.append(df)
    
data01['Count'] = data01.Count0.astype(int).apply(lambda x : "{:,}".format(x))
cases_ncr = covid_ph[covid_ph.RegionRes == 'NCR'].shape[0]

fig = px.bar(data01, x='Percent', orientation='h', color='Type', 
             title="Total Confirmed Cases in <b>NCR</b>: " + f"<b>{cases_ncr}</b>" + " (" + str(round(cases_ncr/total_cases*100,1)) + "% of Total)",
             text='Count', hover_name='Count', height=800)
fig.update_layout(legend=dict(orientation='h', x=0.52, y=1, title="", font_size=12), hoverlabel=dict(font_size=20))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=8))

for i in range(0, len(data01.City.unique())):
    # City
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=0, y=i,
                            text=data01.City.unique()[i] + " "*15), font_size=15)
    
    # Death Rate
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='left', yanchor='middle',
                            ax=0, ay=0,
                            x=104, y=i,
                            text=data01[(data01.Type == 'Died') & (data01.City == list(reversed(topcity))[i])]['Pct'].values[0]), 
                       font_size=14, font_color = px.colors.qualitative.Plotly[1])

    # Recovery Rate
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='left', yanchor='middle',
                            ax=0, ay=0,
                            x=119, y=i,
                            text=data01[(data01.Type == 'Recovered') & (data01.City == list(reversed(topcity))[i])]['Pct'].values[0]), 
                       font_size=14, font_color = px.colors.qualitative.Plotly[2])

    # Total Confirmed Cases
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=0, y=i,
                            text="{:,}".format(data01[data01.City == list(reversed(topcity))[i]].Count0.sum())+" "*3), 
                       font_size=14)

# Death Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                        arrowhead=0, xanchor='left', yanchor='middle',
                        ax=0, ay=0,
                        x=103, y=17,
                        text="Fatality<br>Rate"), 
                   font_size=11, font_color = px.colors.qualitative.Plotly[1])

# Recovery Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                        arrowhead=0, xanchor='left', yanchor='middle',
                        ax=0, ay=0,
                        x=118, y=17,
                        text="Recovery<br>Rate"), 
                   font_size=11, font_color = px.colors.qualitative.Plotly[2])

# Total Confirmed Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=-1, y=17,
                        text="Confirmed<br>Cases"), 
                   font_size=11)

fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=8, title=None)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()
In [134]:
a1 = data01.pivot(index='City', columns='Type', values='Percent').reset_index()
a2 = data01[['City', 'Count0']].groupby('City').sum().reset_index()
data01_1 = pd.merge(a1, a2, how='left', on='City')
data01_1.rename({'Active': 'Active Rate (%)', 
                 'Died': 'Fatality Rate (%)', 
                 'Recovered': 'Recovery Rate (%)', 
                 'Count0': 'Case Count'}, axis=1, inplace=True)
data01_1['City'] = data01_1['City'].str.replace('CITY OF ', '')
data01_1['Active Rate (%)'] = round(data01_1['Active Rate (%)'], 1)
data01_1['Fatality Rate (%)'] = round(data01_1['Fatality Rate (%)'], 1)
data01_1['Recovery Rate (%)'] = round(data01_1['Recovery Rate (%)'], 1)

figgg = px.scatter(data01_1, x='Recovery Rate (%)', y='Fatality Rate (%)', 
                   title='COVID-19 Status in NCR Cities',
                   size='Case Count', text='City', size_max=40)
figgg.update_layout(hoverlabel=dict(font_size=20), font_size=11, 
                    legend=dict(orientation="v", title="", font_size=12,
                                x=0.01, y=0.01, yanchor="bottom"))
figgg.update_xaxes(range=(0, round(data01_1['Recovery Rate (%)'].max()*1.1)), titlefont_size=16)
figgg.update_yaxes(range=(0, round(data01_1['Fatality Rate (%)'].max()*1.1)), titlefont_size=16)

figgg.add_scatter(x=[round(percent_recovered*100,1)]*len(list(range(0, round(data01_1['Fatality Rate (%)'].max()*1.1)+1))), 
                  y=list(range(0, round(data01_1['Fatality Rate (%)'].max()*1.1)+1)), 
                  line=dict(color=px.colors.qualitative.Plotly[2], dash='dash'), 
                  mode='lines', name='Total PH Recovery Rate', hoverinfo='skip')

figgg.add_scatter(y=[round(percent_died*100,1)]*len(list(range(0, round(data01_1['Recovery Rate (%)'].max()*1.1)+1))), 
                  x=list(range(0, round(data01_1['Recovery Rate (%)'].max()*1.1)+1)), 
                  line=dict(color=px.colors.qualitative.Plotly[1], dash='dash'), 
                  mode='lines', name='Total PH Fatality Rate', hoverinfo='skip')

figgg.show()

Daily Cases

In [18]:
base = pd.DataFrame(covid_ph.DateRepConf.sort_values().unique()).rename({0: 'Date'}, axis=1)

c = cases_daily.reset_index().rename({'DateRepConf': 'Date', 'CaseCode': 'Count'}, axis=1)
c = pd.merge(base, c, how='left', on='Date')
c['Type'] = 'Reported Confirmed Cases'
c.Count.fillna(0)
c['Cum_count'] = c.Count.cumsum()
c['Percent_change'] = c.Count.pct_change()*100
c['%Change'] = c.Cum_count.pct_change()*100

d = deaths_daily.reset_index().rename({'DateRepRem': 'Date', 'CaseCode': 'Count'}, axis=1)
d = pd.merge(base, d, how='left', on='Date')
d['Type'] = 'Reported Deaths'
#d = deaths_daily.reset_index().rename({'DateDied': 'Date', 'CaseCode': 'Count'}, axis=1)
#d['Type'] = 'Number of Actual Deaths'
d.Count = d.Count.fillna(0)
d['Cum_count'] = d.Count.cumsum()
d['Percent_change'] = d.Count.pct_change()*100
d['%Change'] = d.Cum_count.pct_change()*100

r = recoveries_daily.reset_index().rename({'DateRepRem': 'Date', 'CaseCode': 'Count'}, axis=1) 
r = pd.merge(base, r, how='left', on='Date')
r['Type'] = 'Reported Recoveries'
#r = recoveries_daily.reset_index().rename({'DateRecover': 'Date', 'CaseCode': 'Count'}, axis=1)
#r['Type'] = 'Number of Actual Recoveries'
r.Count = r.Count.fillna(0)
r['Cum_count'] = r.Count.cumsum()
r['Percent_change'] = r.Count.pct_change()*100
r['%Change'] = r.Cum_count.pct_change()*100

import statsmodels.api as sm

cycle, trend1 = sm.tsa.filters.hpfilter(c['Count'], 7)
c['trend_HP'] = trend1
cycle, trend2 = sm.tsa.filters.hpfilter(d['Count'], 7)
d['trend_HP'] = trend2
cycle, trend3 = sm.tsa.filters.hpfilter(r['Count'], 7)
r['trend_HP'] = trend3

c['trend_7MA'] = c['Count'].rolling(7).mean()
d['trend_7MA'] = d['Count'].rolling(7).mean()
r['trend_7MA'] = r['Count'].rolling(7).mean()

c['trend_7MA'] = c['trend_7MA'].fillna(" ")

dr = pd.concat([r,d])

dr['trend_7MA_name'] = 'Trend (7-day MA)'
dr['trend_HP_name'] = 'Trend (HP Filter)'

d['trend_7MA_name'] = 'Trend (7-day MA)'
d['trend_HP_name'] = 'Trend (HP Filter)'
r['trend_7MA_name'] = 'Trend (7-day MA)'
r['trend_HP_name'] = 'Trend (HP Filter)'
c['trend_7MA_name'] = 'Trend (7-day MA)'
c['trend_HP_name'] = 'Trend (HP Filter)'
In [19]:
fig1 = px.bar(c, x='Date', y='Count', color='Type', 
              color_discrete_sequence=["lightblue"])

fig2 = px.line(c, x='Date', y='trend_7MA', color='trend_7MA_name', line_shape='spline', 
               color_discrete_sequence=['blue'])
# fig3 = px.line(c, x='Date', y='trend_HP', color='trend_HP_name', line_shape='spline', 
#                color_discrete_sequence=['lightblue'])

fig1.add_trace(fig2.data[0])
# fig1.add_trace(fig3.data[0].update(line={'dash': 'dash'}))
fig1.update_yaxes(title="Case Count", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                   xaxis=dict(tickmode='array', dtick=0, 
                              tickfont_size=9, title="Date"),
                   legend=dict(orientation="v", title="",
                               x=0.05, y=0.75, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

fig1.show()
In [20]:
fig1 = px.bar(d, x='Date', y='Count', color='Type', 
              color_discrete_sequence=["lightpink"])

fig2 = px.line(d, x='Date', y='trend_7MA', color='trend_7MA_name', line_shape='spline', 
               color_discrete_sequence=['tomato'])
# fig3 = px.line(c, x='Date', y='trend_HP', color='trend_HP_name', line_shape='spline', 
#                color_discrete_sequence=['pink'])

fig1.add_trace(fig2.data[0])
# fig1.add_trace(fig3.data[0].update(line={'dash': 'dash'}))
fig1.update_yaxes(title="Case Count", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Daily Death Cases"),
                   xaxis=dict(tickmode='array', dtick=0, 
                              tickfont_size=9, title="Date"),
                   legend=dict(orientation="v", title="",
                               x=0.05, y=0.75, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

fig1.show()
In [21]:
fig1 = px.bar(r, x='Date', y='Count', color='Type', 
              color_discrete_sequence=["lightgreen"])

fig2 = px.line(r, x='Date', y='trend_7MA', color='trend_7MA_name', line_shape='spline', 
               color_discrete_sequence=['green'])
# fig3 = px.line(c, x='Date', y='trend_HP', color='trend_HP_name', line_shape='spline', 
#                color_discrete_sequence=['pink'])

fig1.add_trace(fig2.data[0])
# fig1.add_trace(fig3.data[0].update(line={'dash': 'dash'}))
fig1.update_yaxes(title="Case Count", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Daily Recovered Cases"),
                   xaxis=dict(tickmode='array', dtick=0, 
                              tickfont_size=9, title="Date"),
                   legend=dict(orientation="v", title="",
                               x=0.05, y=0.75, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

fig1.show()

Cumulative Cases

In [22]:
rc = np.maximum(cases_daily - recoveries_daily  - deaths_daily, 0).fillna(0).reset_index()
rc.rename({'CaseCode': 'Count', 'index': 'Date'}, axis=1, inplace=True)
rc['Cum_count'] = rc.Count.cumsum()
rc['Type'] = 'Reported Remaining Active Cases'

cc = cases_daily.reset_index()
cc.rename({'CaseCode': 'Count', 'DateRepConf': 'Date'}, axis=1, inplace=True)
cc['Cum_count'] = cc.Count.cumsum()
cc['Type'] = 'Reported Confirmed Cases'

# Figure 1
fig1 = px.bar(cc, x='Date', y='Cum_count', color='Type',
              color_discrete_sequence=['lightblue'])

# Figure 1.1
fig2 = px.line(rc, x='Date', y='Cum_count', color='Type',
               color_discrete_sequence=["blue"])

fig1.add_trace(fig2.data[0])
fig1.update_yaxes(title="Case Count", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Cumulative Number of Cases"),
                   xaxis=dict(tickmode='array', dtick=0, tickfont_size=9, title="Date"),
                   legend=dict(orientation="v", title="",
                               x=0.05, y=0.8, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20), hovermode="x")
fig1.show()

# Figure 2
fig2 = px.line(dr, x='Date', y='Cum_count', color='Type', line_shape='spline',
               color_discrete_sequence=['limegreen', 'tomato'])

fig2.update_yaxes(title="Case Count", showgrid=True, tickfont_size=10)

fig2.update_layout(title_text="PH Covid-19 Cumulative Number of Cases")
fig2.update_layout(title=dict(x=0.5),
                  xaxis=dict(tickmode='array', showgrid=False, #tick0=0,
                             dtick=0, tickfont_size=9, title="Date"),
                  legend=dict(orientation="v", title="",
                              x=0.05, y=0.8, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20), hovermode='x')

fig2.show()
Cumulative Cases In Log Scale
In [22]:
cc_ecq = cc[cc.Date >= '2020-03-15'].reset_index(drop=True).reset_index()
dr_ecq = dr[dr.Date >= '2020-03-15']

# Figure 1
fig1 = px.line(cc_ecq, x='Date', y='Cum_count', color='Type', log_y=True,
              color_discrete_sequence=['blue'])
fig1.update_yaxes(title="log(Case Count)", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Cumulative Number of Cases (in Log Scale)"),
                   xaxis=dict(tickmode='array', showgrid=False, dtick=0, tickfont_size=9, title="Day"),
                   legend=dict(orientation="v", title="",
                               x=0.05, y=0.8, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20), hovermode="x")
# fig1.update_layout(shapes=[dict(type= 'line', 
#                                 yref= 'y', y0=1, y1=10000, 
#                                 xref= 'x', x0=1, x1=50, 
#                                 line=dict(color="green", width=1, dash="dashdot"))])
fig1.show()

# Figure 2
fig2 = px.line(dr_ecq, x='Date', y='Cum_count', 
               color='Type', 
               line_shape='spline', log_y = True,
               color_discrete_sequence=['limegreen', 'tomato'])

fig2.update_yaxes(title="log(Case Count)", showgrid=True, tickfont_size=10)
fig2.update_layout(title_text="PH Covid-19 Cumulative Number of Cases (in Log Scale)")
fig2.update_layout(title=dict(x=0.5),
                  xaxis=dict(tickmode='array', showgrid=False, #tick0=0,
                             dtick=0, tickfont_size=9, title="Date"),
                  legend=dict(orientation="v", title="",
                              x=0.05, y=0.8, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20), hovermode='x')

fig2.show()

print("Note: Day 0 is March 15 when ECQ was implemented & hit 140 confirmed cases.")
Note: Day 0 is March 15 when ECQ was implemented & hit 140 confirmed cases.
In [23]:
# # Create figure with secondary y-axis
# fig = make_subplots(specs=[[{"secondary_y": True}]])

# # Figure 1
# fig1 = px.line(dr, x='Date', y='Cum_count', color='Type', line_shape='spline',
#                color_discrete_sequence=['limegreen', 'tomato'])

# # Figure 2
# fig2 = px.bar(c, x='Date', y='Cum_count', color='Type', 
#               color_discrete_sequence=["lightblue"])

# fig.add_trace(fig2.data[0], secondary_y=False)
# fig.add_trace(fig1.data[0], secondary_y=True)
# fig.add_trace(fig1.data[1], secondary_y=True)

# fig.update_yaxes(range=[0, dr.Cum_count.max()*1.5], 
#                  title="Deaths / Recoveries", tickfont_size=10, secondary_y=True)
# fig.update_yaxes(title="Active Cases", showgrid=False, tickfont_size=10, secondary_y=False)

# fig.update_layout(title_text="PH Covid-19 Cumulative Number of Cases")
# fig.update_layout(title=dict(x=0.5),
#                   xaxis=dict(tickmode='linear', #tick0=0, 
#                              dtick=0, tickfont_size=8, title="Date"),
#                   legend=dict(orientation="v", title="",
#                               x=0.02, y=0.7, yanchor="bottom"), 
#                   hovermode='x')

# fig.show()

Percent Change on Daily Cases

In [24]:
# Compute trend based on Hodrick-Prescott Filter
import statsmodels.api as sm

c1 = c[c.Date >= '2020-03-15']
d1 = d[d.Date >= '2020-03-15']
r1 = r[r.Date >= '2020-03-15']

c1 = c1.reset_index(drop=True)
cycle, trend = sm.tsa.filters.hpfilter(c1['%Change'], 7)
c1['trend'] = trend

d1 = d1.reset_index(drop=True)
cycle, trend = sm.tsa.filters.hpfilter(d1['%Change'], 7)
d1['trend'] = trend

r1 = r1.reset_index(drop=True)
cycle, trend = sm.tsa.filters.hpfilter(r1['%Change'], 7)
r1['trend'] = trend
In [25]:
print('-------------------------------- Confirmed Cases --------------------------------')
print('1-day Percent Growth:', str(round(c1['%Change'].tail(1).values[0],1))+'%')
print('7-day Average Percent Growth:', str(round(c1['%Change'].tail(7).mean(),1))+'%')
print('30-day Average Percent Growth:', str(round(c1['%Change'].tail(30).mean(),1))+'%')
-------------------------------- Confirmed Cases --------------------------------
1-day Percent Growth: 2.4%
7-day Average Percent Growth: 1.8%
30-day Average Percent Growth: 2.2%
In [26]:
# Figure 1
c1['trend_name'] = 'Trend (HP Filter)'
fig_trend = px.line(c1, x='Date', y='trend', color='trend_name', 
                    color_discrete_sequence=["cadetblue"])

fig2 = px.line(c1, x='Date', y='%Change', color='Type',
               color_discrete_sequence=["cadetblue"])
fig2.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}))
fig2.update_yaxes(title="%Change", showgrid=False, tickfont_size=10)
fig2.update_layout(title=dict(x=0.5, text="%Change on Daily Confirmed Cases"),
                   xaxis=dict(tickmode='array', #tick0=4, dtick=0, 
                              tickfont_size=9, title="Date", showgrid=False),
                   legend=dict(orientation="v", title="",
                              x=0.75, y=0.85, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

# fig2.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
#                          xanchor='left', yanchor='middle', ax=0, ay=0,
#                          x=pd.to_datetime(c1.Date.unique().max()), 
#                          y=round(c1['trend'][-1:].values[0],1),
#                          text=" "*2 + round(c1['trend'][-1:].values[0],1).astype('str')+'%', 
#                          font_size=14, font_color='cadetblue'))

fig2.show()
In [27]:
print('--------------------------------Reported Deaths--------------------------------')
print('1-day Percent Growth:', str(round(d1['%Change'].tail(1).values[0],1))+'%')
print('7-day Average Percent Growth:', str(round(d1['%Change'].tail(7).mean(),1))+'%')
print('30-day Average Percent Growth:', str(round(d1['%Change'].tail(30).mean(),1))+'%')
--------------------------------Reported Deaths--------------------------------
1-day Percent Growth: 1.5%
7-day Average Percent Growth: 0.8%
30-day Average Percent Growth: 1.9%
In [28]:
# Figure 2
d1['trend_name'] = 'Trend (HP Filter)'
fig_trend = px.line(d1, x='Date', y='trend', color='trend_name', color_discrete_sequence=["tomato"])

fig2 = px.line(d1, x='Date', y='%Change', color='Type', 
               color_discrete_sequence=["tomato"])
fig2.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}))
fig2.update_yaxes(title="%Change", showgrid=False, tickfont_size=10)
fig2.update_layout(title=dict(x=0.5, text="%Change on Daily Reported Deaths"),
                   xaxis=dict(tickmode='array', #tick0=4, dtick=0, 
                              tickfont_size=9, title="Date", showgrid=False),
                   legend=dict(orientation="v", title="",
                              x=0.75, y=0.85, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

# fig2.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
#                          xanchor='left', yanchor='middle', ax=0, ay=0,
#                          x=pd.to_datetime(d1.Date.unique().max()), 
#                          y=round(d1['trend'][-1:].values[0],1),
#                          text=" "*2 + round(d1['trend'][-1:].values[0],1).astype('str')+'%', 
#                          font_size=14, font_color='tomato'))

fig2.show()
In [29]:
print('-------------------------------- Reported Recoveries --------------------------------')
print('1-day Percent Growth:', str(round(r1['%Change'].tail(1).values[0],1))+'%')
print('7-day Average Percent Growth:', str(round(r1['%Change'].tail(7).mean(),1))+'%')
print('30-day Average Percent Growth:', str(round(r1['%Change'].tail(30).mean(),1))+'%')
-------------------------------- Reported Recoveries --------------------------------
1-day Percent Growth: 2.7%
7-day Average Percent Growth: 2.6%
30-day Average Percent Growth: 4.7%
In [30]:
# Figure 3
r1['trend_name'] = 'Trend (HP Filter)'
fig_trend = px.line(r1, x='Date', y='trend', color='trend_name', color_discrete_sequence=["limegreen"])

fig2 = px.line(r1, x='Date', y='%Change', color='Type', 
               color_discrete_sequence=["limegreen"])
fig2.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}))
fig2.update_yaxes(title="%Change", showgrid=False, tickfont_size=10)
fig2.update_layout(title=dict(x=0.5, text="%Change on Daily Reported Recoveries"),
                   xaxis=dict(tickmode='array', #tick0=4, dtick=0, 
                              tickfont_size=9, title="Date", showgrid=False),
                   legend=dict(orientation="v", title="",
                              x=0.75, y=0.85, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

# fig2.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
#                          xanchor='left', yanchor='middle', ax=0, ay=0,
#                          x=pd.to_datetime(d1.Date.unique().max()), 
#                          y=round(r1['trend'][-1:].values[0],1),
#                          text=" "*2 + round(r1['trend'][-1:].values[0],1).astype('str')+'%', 
#                          font_size=14, font_color='limegreen'))
fig2.show()

Active-Death-Recovery Trend

In [31]:
active_daily = np.maximum(cases_daily - recoveries_daily  - deaths_daily, 0).fillna(0)

active_rate_daily = (active_daily.cumsum() / cases_daily.cumsum())*100
death_rate_daily = (deaths_daily.cumsum() / cases_daily.cumsum())*100
recovery_rate_daily = (recoveries_daily.cumsum() / cases_daily.cumsum())*100
In [32]:
# a1 = active_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
# a1['Type'] = 'Active Rate'
# d1 = death_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
# d1['Type'] = 'Death Rate'
# r1 = recovery_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
# r1['Type'] = 'Recovery Rate'

# adr1 = pd.concat([a1, d1, r1])

# # March 16 - start of ECQ
# a2 = a1[46:]
# d2 = d1[46:]
# r2 = r1[46:]
# adr2 = pd.concat([a2, d2, r2])

# fig = px.bar(adr2, x='Date', y='Rate (%)', color='Type')
# fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False)
# fig.update_yaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False)

# #fig.update_layout(title_text="PH Covid-19 Cumulative Number of Cases")
# fig.update_layout(title=dict(x=0.5),
#                   xaxis=dict(tickmode='linear', #tick0=0, 
#                              dtick=0, tickfont_size=8, title="Date"),
#                   legend=dict(orientation="h", title=" ",
#                               x=-0.02, y=1, yanchor="bottom"))
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='left', yanchor='bottom', ax=0, ay=20,
#                         x=pd.to_datetime(adr2.Date.unique().min()), y=0,
#                         text='Start of ECQ'), font_size=11)
# fig.show()
In [33]:
aaa1 = active_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
aaa1['Type'] = 'Active Rate'
ddd1 = death_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
ddd1['Type'] = 'Death Rate'
rrr1 = recovery_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
rrr1['Type'] = 'Recovery Rate'

adr1 = pd.concat([aaa1, ddd1, rrr1])

# March 16 - start of ECQ
aaa2 = aaa1[46:]
ddd2 = ddd1[46:]
rrr2 = rrr1[46:]
adr2 = pd.concat([aaa2, ddd2, rrr2])

fig = px.line(adr2, x='Date', y='Rate (%)', color='Type')
fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=True, showgrid=True, zeroline=False, showline=False)

# fig.update_layout(title_text="PH Covid-19 Cumulative Number of Cases")
fig.update_layout(title=dict(x=0.5), showlegend=True,
                  xaxis=dict(tickmode='array', #tick0=0, 
                             dtick=0, tickfont_size=8, title="Date"),
                  yaxis=dict(tickfont_size=10),
                  legend=dict(orientation="h", title=" ",
                              x=-0.02, y=1, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
                        xanchor='left', yanchor='bottom', ax=0, ay=20,
                        x=pd.to_datetime(adr2.Date.unique().min()), y=0,
                        text='Start of ECQ'), font_size=11)
# Latest Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
                        xanchor='left', yanchor='middle', ax=0, ay=0,
                        x=pd.to_datetime(adr2.Date.unique().max()), 
                        y=aaa2['Rate (%)'][-1:].values[0],
                        text=" "*2 + round(aaa2['Rate (%)'][-1:].values[0],1).astype('str')+'%', 
                        font_size=15, font_color=px.colors.qualitative.Plotly[0]))
fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
                        xanchor='left', yanchor='middle', ax=0, ay=0,
                        x=pd.to_datetime(adr2.Date.unique().max()), 
                        y=ddd2['Rate (%)'][-1:].values[0],
                        text=" "*2 + round(ddd2['Rate (%)'][-1:].values[0],1).astype('str')+'%', 
                        font_size=15, font_color=px.colors.qualitative.Plotly[1]))
fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
                        xanchor='left', yanchor='middle', ax=0, ay=0,
                        x=pd.to_datetime(adr2.Date.unique().max()), 
                        y=rrr2['Rate (%)'][-1:].values[0],
                        text=" "*2 + round(rrr2['Rate (%)'][-1:].values[0],1).astype('str')+'%', 
                        font_size=15, font_color=px.colors.qualitative.Plotly[2]))

# Legend name
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='right', yanchor='middle', ax=0, ay=0,
#                         x=pd.to_datetime(adr2.Date.unique().min()), 
#                         y=a2['Rate (%)'][:1].values[0],
#                         text="Active Rate"+" ",
#                         font_size=14))
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='right', yanchor='middle', ax=0, ay=0,
#                         x=pd.to_datetime(adr2.Date.unique().min()), 
#                         y=d2['Rate (%)'][:1].values[0],
#                         text="Death Rate"+" ", 
#                         font_size=14))
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='right', yanchor='middle', ax=0, ay=0,
#                         x=pd.to_datetime(adr2.Date.unique().min()), 
#                         y=r2['Rate (%)'][:1].values[0],
#                         text="Recovery Rate"+" ", 
#                         font_size=14))

fig.show()

Patients Profile

Sex

In [34]:
# Figure 0
data = covid_ph.groupby('Sex')['CaseCode'].count().reset_index().sort_values('CaseCode', ascending=False).rename({'CaseCode':'Count'}, axis=1).reset_index(drop=True)
data['Percent'] = (data.Count / data.Count.sum())*100
data['Pct'] = round(data['Percent'],1).astype('str') + '%'

fig = px.bar(data, x='Percent', orientation='h', color='Sex', 
             text='Count', hover_name='Pct', height=250, labels={'Percent': ''})
fig.update_layout(legend=dict(orientation='h', title=""), hovermode=False, font_size=14,
                  title=("Total Confirmed Cases: " + f"<b>{total_cases}</b>"))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=20))
#for i in range(0, len(data.Percent.unique())):
for i in [0, 1]:
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=data.Percent.cumsum().unique()[i]-0.001, y=0.6,
                            text=data.Pct[i]), font_size=16,
                       font_color = px.colors.qualitative.Plotly[i])
    
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()

# Figure 1 & 2
data1 = covid_ph.groupby(['Sex','DateRepConf'])['CaseCode'].count().reset_index().rename({'CaseCode':'Count'}, axis=1)

date_f = active_daily.index.to_frame().reset_index(drop='True').rename({0: 'DateRepConf'}, axis=1)
date_f['Sex'] = 'Female'
data_f = pd.merge(date_f, data1[data1.Sex == 'Female'].iloc[:,1:], how='left', on='DateRepConf').fillna(0)
data_f['Cum_count'] = data_f.Count.cumsum()
date_m = active_daily.index.to_frame().reset_index(drop='True').rename({0: 'DateRepConf'}, axis=1)
date_m['Sex'] = 'Male'
data_m = pd.merge(date_m, data1[data1.Sex == 'Male'].iloc[:,1:], how='left', on='DateRepConf').fillna(0)
data_m['Cum_count'] = data_m.Count.cumsum()

data2 = pd.concat([data_m, data_f]).reset_index(drop=True)
data2['Percent'] = data2.groupby('DateRepConf')['Cum_count'].apply(lambda x: 100 * x / float(x.sum())).values
data3 = data2[data2.DateRepConf > '2020-03-15'] # Filter start ECQ

fig1 = px.line(data3, x='DateRepConf', y='Cum_count', color='Sex')
fig2 = px.bar(data3, x='DateRepConf', y='Percent', color='Sex')

fig000 = make_subplots(rows=1, cols=2, subplot_titles=("Cumulative Count by Sex", "Cumulative Sex Ratio"))
for loop in range(0, len(data.Sex.unique())):
    fig000.add_trace(fig1.data[loop], row=1, col=1)
    fig000.add_trace(fig2.data[loop], row=1, col=2)
    fig000.update_traces(showlegend=True, row=1, col=1)
    fig000.update_traces(showlegend=False, row=1, col=2)
fig000.update_layout(barmode='stack', 
                     xaxis1=dict(tickmode='array', dtick=0, tickfont_size=8, title="Date"),
                     xaxis2=dict(tickmode='array', dtick=0, tickfont_size=8, title="Date"), 
                     yaxis1=dict(tickfont_size=8), yaxis2=dict(tickfont_size=8))
# fig000.update_xaxes(showticklabels=False, showgrid=False, 
#                     zeroline=False, showline=False)
# fig000.update_yaxes(showticklabels=False, showgrid=False, 
#                     zeroline=False, showline=False, row=1, col=2)
fig000.update_layout(title=dict(x=0.5), showlegend=False, height=400,
                     legend=dict(orientation="h", x=-0.05, y=-0.4,
                                 yanchor="bottom"), hoverlabel=dict(font_size=20))
    
fig000.show()


# Figure 3 & 4
data2 = pd.concat([data_m, data_f]).reset_index(drop=True)
data2['Percent'] = data2.groupby('DateRepConf')['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data3 = data2[data2.DateRepConf > '2020-03-15'] # Filter start ECQ

fig1 = px.line(data3, x='DateRepConf', y='Count', color='Sex')
fig2 = px.bar(data3, x='DateRepConf', y='Percent', color='Sex')

fig000 = make_subplots(rows=1, cols=2, subplot_titles=("Daily Case Count by Sex", "Daily Case Count Sex Ratio"))
for loop in range(0, len(data.Sex.unique())):
    fig000.add_trace(fig1.data[loop], row=1, col=1)
    fig000.add_trace(fig2.data[loop], row=1, col=2)
    fig000.update_traces(showlegend=True, row=1, col=1)
    fig000.update_traces(showlegend=False, row=1, col=2)
fig000.update_layout(barmode='stack', 
                     xaxis1=dict(tickmode='array', dtick=0, tickfont_size=8, title="Date"),
                     xaxis2=dict(tickmode='array', dtick=0, tickfont_size=8, title="Date"), 
                     yaxis1=dict(tickfont_size=8), yaxis2=dict(tickfont_size=8))
# fig000.update_xaxes(showticklabels=False, showgrid=False, 
#                     zeroline=False, showline=False)
# fig000.update_yaxes(showticklabels=False, showgrid=False, 
#                     zeroline=False, showline=False, row=1, col=2)
fig000.update_layout(title=dict(x=0.5), showlegend=False, height=400,
                     legend=dict(orientation="h", x=-0.05, y=-0.4,
                                 yanchor="bottom"), hoverlabel=dict(font_size=20))
    
fig000.show()

print("Note: There are", covid_ph.Sex.isnull().sum(), "cases with no sex information")
Note: There are 0 cases with no sex information
In [35]:
# Figure 01
data01 = covid_ph.groupby(['Sex','RemovalType'])['CaseCode'].count().reset_index().sort_values('CaseCode', ascending=False).rename({'CaseCode':'Count'}, axis=1)
data01['Percent'] = data01.groupby('RemovalType')['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data01['Pct'] = round(data01['Percent'],1).astype('str') + '%'

fig = px.bar(data01, x='Percent', orientation='h', color='Sex', 
             text='Count', hover_name='Pct', hover_data=None, height=350)
fig.update_layout(legend=dict(orientation='h', x=0.09, y=0, title=""), hoverlabel=dict(font_size=20))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=18))
for i in range(0, len(data01.RemovalType.unique())):
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=0, y=i,
                            text=data01.RemovalType.unique()[i] + " "), font_size=15)
    
fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=8, title=None)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()

Age

In [36]:
# df = covid_ph.fillna("no info")
age = covid_ph.groupby(['AgeGroup','Sex'])['CaseCode'].count().reset_index().rename({'CaseCode': 'Count'},axis=1)
age['AgeGroup'] = age.AgeGroup.str.replace('5 to 9', '05 to 09', case=True, regex=False)
age1 = age.sort_values(['Sex','AgeGroup'], ascending=[False, True]).reset_index(drop='True')
fig1 = px.bar(age1, x='AgeGroup', y='Count', color='Sex', barmode='group', 
              title='Age Group Distribution by Sex')
age2 = age1.groupby('AgeGroup').sum().reset_index()
age2['Total'] = 'Total Cases'

fig2 = px.line(age2, x='AgeGroup', y='Count', color='Total', line_shape='spline')
fig2.data[0].update(mode='markers+lines')
fig2.update_traces(line_color='lightgreen', showlegend=True, text="Count")
fig1.add_trace(fig2.data[0])

fig1.update_layout(legend=dict(orientation="h", title="",
                               x=0.65, y=0.9,
                               yanchor="bottom"), hoverlabel=dict(font_size=20))

fig1.show()

pcnt = round((covid_ph.AgeGroup.isnull().sum() / covid_ph.shape[0])*100,2).astype(str) + "%"
print("*Note: There are", covid_ph.AgeGroup.isnull().sum(), "(", pcnt,")", "cases with no age information")
*Note: There are 74 ( 0.5% ) cases with no age information
In [37]:
# df = covid_ph.copy()
# df['RemovalType'] = df.RemovalType.fillna('Active')
# age_type = df.groupby(['AgeGroup','RemovalType'])['CaseCode'].count().reset_index().rename({'CaseCode': 'Count'}, axis=1)
# age_type['AgeGroup'] = age_type.AgeGroup.str.replace('5 to 9', '05 to 09', case=True, regex=False)
# age_type1 = age_type.sort_values(['AgeGroup', 'RemovalType'], ascending=True).reset_index(drop='True')
# age_type1['Percent'] = age_type1.groupby(['RemovalType'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
# age_type1

# age_sex_type = df.groupby(['AgeGroup', 'Sex', 'RemovalType'])['CaseCode'].count().reset_index().rename({'CaseCode': 'Count'}, axis=1)
# age_sex_type['AgeGroup'] = age_sex_type.AgeGroup.str.replace('5 to 9', '05 to 09', case=True, regex=False)
# age_sex_type1 = age_sex_type.sort_values(['AgeGroup', 'Sex', 'RemovalType'], ascending=True).reset_index(drop='True')
# age_sex_type1['Percent'] = age_sex_type1.groupby(['Sex','RemovalType'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
# age_sex_type1.sort_values(['RemovalType','AgeGroup','Sex'], ascending=[True, True, False], inplace=True)

# fig1 = px.line(age_type1, x='AgeGroup', y='Percent', color='RemovalType',
#                line_shape="spline", hover_name='Count',
#                labels={'RemovalType': "PatientType"}, 
#               title='Age Group Distribution by Patient Type')
# # fig1 = px.bar(age_type1, x='AgeGroup', y='Percent', color='RemovalType', 
# #               barmode='group', labels={'RemovalType': " "}, 
# #               title='Age Distribution by Patient Type')
# for i in [0,1,2]:
#     fig1.data[i].update(mode='markers+lines')
# fig1.update_layout(legend=dict(orientation="h", title="",
#                                x=0.03, y=0.9,
#                                yanchor="bottom"), hovermode='x')
# fig1.show()

# # fig000 = make_subplots(rows=1, cols=2, subplot_titles=("Male", "Female"))
# # figM = px.bar(age_sex_type1[age_sex_type1.Sex == 'Male'], 
# #               x='AgeGroup', y='Percent', color='RemovalType', 
# #               barmode='group', labels={'RemovalType': " "})
# # figF = px.bar(age_sex_type1[age_sex_type1.Sex == 'Female'], 
# #               x='AgeGroup', y='Percent', color='RemovalType', 
# #               barmode='group', labels={'RemovalType': " "})
# # for loop in range(0, len(age_sex_type1.RemovalType.unique())):
# #     fig000.add_trace(figM.data[loop], row=1, col=1)
# #     fig000.add_trace(figF.data[loop], row=1, col=2)
# #     fig000.update_traces(showlegend=True, row=1, col=1)
# #     fig000.update_traces(showlegend=False, row=1, col=2)
# #     fig000.update_layout(legend=dict(orientation="h", title="",
# #                                      x=-0.05, y=-0.5,
# #                                      yanchor="bottom"), height=400)
# # fig000.show()
In [38]:
df_recovered = covid_ph.loc[(covid_ph.RemovalType=='Recovered')].reset_index(drop=True)
df_died = covid_ph.loc[(covid_ph.RemovalType=='Died')].reset_index(drop=True)

df = pd.concat([df_recovered, df_died])
fig = px.histogram(df, x='Age', color='RemovalType', opacity=0.8, nbins=20,
                   marginal='box', barmode='overlay', 
                   labels={"RemovalType": "PatientType"}, 
                   title='Age Distribution of Recovered vs Died Patients')
fig.update_layout(legend=dict(orientation="h", title="", 
                              x=0.03, y=0.65, yanchor="bottom"),
                  xaxis=dict(tickmode='linear', dtick=10))

fig.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_recovered.Age.median()+1, y=0.86,
                        text=int(df_recovered.Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[0]))
fig.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_died.Age.median()+1, y=0.99,
                        text=int(df_died.Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[1]))

fig.show()
In [39]:
df_recovered = covid_ph.loc[(covid_ph.RemovalType=='Recovered')].reset_index(drop=True)
df_died = covid_ph.loc[(covid_ph.RemovalType=='Died')].reset_index(drop=True)

df = pd.concat([df_recovered, df_died])

# Recovered Patients
fig1 = px.histogram(df_recovered, x='Age', color='Sex', opacity=0.8, 
                   marginal='box', barmode='overlay', 
                   labels={"RemovalType": "PatientType"}, 
                   title='Age Distribution of Recovered Patients')
fig1.update_layout(legend=dict(orientation="h", title="", x=0.03, y=0.65, yanchor="bottom"),                   
                  xaxis=dict(tickmode='linear', dtick=10))

fig1.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_recovered[df_recovered.Sex == 'Male'].Age.median()+1, y=0.86,
                        text=int(df_recovered[df_recovered.Sex == 'Male'].Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[0]))
fig1.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_recovered[df_recovered.Sex == 'Female'].Age.median()+1, y=0.99,
                        text=int(df_recovered[df_recovered.Sex == 'Female'].Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[1]))
fig1.show()

fig2 = px.histogram(df_died, x='Age', color='Sex', opacity=0.8, 
                   marginal='box', barmode='overlay', 
                   labels={"RemovalType": "PatientType"}, 
                   title='Age Distribution of Died Patients')
fig2.update_layout(legend=dict(orientation="h", title="", x=0.03, y=0.65, yanchor="bottom"),
                   xaxis=dict(tickmode='linear', dtick=10))

fig2.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_died[df_died.Sex == 'Male'].Age.median()+1, y=0.86,
                        text=int(df_died[df_died.Sex == 'Male'].Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[0]))
fig2.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_died[df_died.Sex == 'Female'].Age.median()+1, y=0.99,
                        text=int(df_died[df_died.Sex == 'Female'].Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[1]))
fig2.show()

Residence

In [40]:
df_regres = covid_ph.groupby('ProvRes').count()['CaseCode'].sort_values(ascending=False).reset_index()
df_regres['PH'] = 'Philippines'
df_regres.rename({'CaseCode': 'Count'}, inplace=True, axis=1)
df_regres['Percentage'] = round((df_regres.Count / df_regres.Count.sum())*100,1).astype('str')+'%' 
df_regres = df_regres[df_regres.ProvRes != 'For Validation']

df_regres1 = covid_ph.groupby(['ProvRes', 'CityMunRes']).count()['CaseCode'].sort_values(ascending=False).reset_index()
df_regres1['PH'] = 'Philippines'
df_regres1.rename({'CaseCode': 'Count'}, inplace=True, axis=1)
df_regres1['Percentage'] = round((df_regres1.Count / df_regres1.Count.sum())*100,1).astype('str')+'%' 
df_regres1 = df_regres1[df_regres1.ProvRes != 'For Validation']
df_regres1 = df_regres1[df_regres1.CityMunRes != 'For Validation']
In [41]:
top = 10
fig1 = px.bar(df_regres.head(top), x="Count", y="ProvRes",
             orientation='h', hover_name='Percentage',
             text="Count",
             title="Provinces with Most Cases")
fig1.update_traces(textposition='outside', textfont_size=12)
fig1.update_layout(title=dict(x=0.5), yaxis=dict(categoryorder='total ascending'), 
                   showlegend = False, legend_orientation="v")
fig2 = px.bar(df_regres1.head(top), x="Count", y="CityMunRes",
             orientation='h', hover_name='Percentage',
             text="Count",
             title="City/Municipalities with Most Cases")
fig2.update_traces(textposition='outside', textfont_size=12)
fig2.update_layout(title=dict(x=0.5), yaxis=dict(categoryorder='total ascending'),
                   showlegend = False, legend_orientation="v")
#fig1.show()
#fig2.show()

fig000 = make_subplots(rows=1, cols=2, subplot_titles=("Provinces with Most Cases", "Cities with Most Cases"))
fig000.add_trace(fig1.data[0], row=1, col=1)
fig000.add_trace(fig2.data[0], row=1, col=2)
fig000.update_layout(yaxis1=dict(categoryorder='total ascending'), yaxis2=dict(categoryorder='total ascending'), height=500)
fig000.update_xaxes(range=(0, round(max(df_regres.Count) + max(df_regres.Count)/4)), row=1, col=1)
fig000.update_xaxes(range=(0, round(max(df_regres1.Count) + max(df_regres1.Count)/4)), row=1, col=2)
fig000.update_layout(xaxis={"domain": [0.05, 0.40]}, xaxis2={"domain": [0.6, 1]},
                     hoverlabel=dict(font_size=15))
fig000.show()
In [42]:
z1 = covid_ph[covid_ph.RegionRes.isnull()].shape[0]
z2 = covid_ph[covid_ph.ProvRes == 'For Validation'].shape[0]

print('Note:')
print('1. There are', z1, 'cases no information about residence.')
print('2. There are', z2, '('+str(round(z2/covid_ph.shape[0],1)*100)+'%'+')' , 'cases with region but no specific province/city of residence (tagged as "For Validation").')
Note:
1. There are 111 cases no information about residence.
2. There are 0 (0.0%) cases with region but no specific province/city of residence (tagged as "For Validation").
In [43]:
fig = px.treemap(df_regres,
                 path=['PH','ProvRes'],
                 values='Count',
                 hover_name='Percentage',
                 title='Case Count by Province of Residence',
                 branchvalues='total', height=700)
fig.update_layout(font_size=14)
fig.update_layout(hoverlabel=dict(font_size=15))
fig.show()

fig1 = px.treemap(df_regres1,
                 path=['PH','ProvRes','CityMunRes'],
                 values='Count',
                 hover_name='Percentage',
                 title='Case Count by City/Municipality of Residence',
                 branchvalues='total', height=700)

#fig1.data[0].textinfo = 'label+text+value'
#fig1.layout.hovermode = False
fig1.update_layout(font_size=14)
fig1.update_layout(hoverlabel=dict(font_size=14))
fig1.show()

a = covid_ph.RegionRes.isnull().sum()
b = round(((a / covid_ph.shape[0])*100),1).astype(str) +'%'
In [44]:
import requests
import json

# PHILIPPINES
geojson = json.loads(requests.get("https://raw.githubusercontent.com/macoymejia/geojsonph/master/Province/Provinces.minimal.json").text)
# NCR
geojson1 = json.loads(requests.get("https://raw.githubusercontent.com/macoymejia/geojsonph/master/Philippines/Luzon/Metropolitant%20Manila/MetropolitantManila.json").text)
In [45]:
x = []
for i in range(0, len(geojson['features'])):
    z = list(geojson['features'][i].values())[1]['PROVINCE']
    x.append(z)
p = pd.DataFrame(x).rename({0: 'Province'}, axis=1)

df_regres0 = df_regres.copy()
df_regres0['ProvRes'] = df_regres0['ProvRes'].str.title()
df_regres0['ProvRes'] = df_regres0['ProvRes'].replace({'Metro Manila': 'Metropolitan Manila',
                                                       'Davao Del Sur': 'Davao del Sur',
                                                       'Zamboanga Del Sur': 'Zamboanga del Sur',
                                                       'Davao Del Norte': 'Davao del Norte',
                                                       'Lanao Del Sur': 'Lanao del Sur',
                                                       'Lanao Del Norte': 'Lanao del Norte',
                                                       'Agusan Del Norte': 'Agusan del Norte',
                                                       'Cotabato': 'North Cotabato', 
                                                       'Cebu Province': 'Cebu', 
                                                       'Iloilo Province': 'Iloilo', 
                                                       'Tarlac Province': 'Tarlac'})

df_regres0 = df_regres0.reset_index(drop=True)

# CHECKER
prov = pd.merge(p, df_regres0, how='outer', left_on='Province', right_on='ProvRes')
prov['Count_log'] = np.log10(prov['Count'])
prov['Count_log'] = prov['Count_log'].fillna(0)
prov['Count'] = prov['Count'].fillna(0)
# prov.tail(10)

Geo Mapping

In [46]:
fig = px.choropleth(prov, geojson=geojson, color="Count_log",
                    locations="Province", featureidkey="properties.PROVINCE",
                    hover_name='Count',
                    projection="mercator", 
                    labels={'Count_log': 'log(Count)'}, 
                    title='Number of Covid-19 Confirmed Cases in the Philippines (as of ' + str(asof.date().strftime("%b %d, %Y")) + ")" , 
                    color_continuous_scale=px.colors.sequential.OrRd,  
                    height=1100)
fig.update_layout(hoverlabel=dict(font_size=20, bgcolor='white', bordercolor='red'))
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
In [47]:
df_ncr = df_regres1[df_regres1['ProvRes'] == 'METRO MANILA'].reset_index(drop=True)
df_ncr['CityMunRes'] = df_ncr['CityMunRes'].replace({'QUEZON CITY': 'Quezon City',
                         'CITY OF MANILA': 'Manila', 
                         'CITY OF PARAÑAQUE': 'Parañaque',
                         'CITY OF MAKATI': 'Makati City', 
                         'CITY OF MANDALUYONG': 'Mandaluyong', 
                         'CITY OF PASIG': 'Pasig City', 
                         'TAGUIG CITY': 'Taguig', 
                         'CITY OF SAN JUAN': 'San Juan', 
                         'CALOOCAN CITY': 'Kalookan City',
                         'PASAY CITY': 'Pasay City', 
                         'CITY OF LAS PIÑAS': 'Las Piñas',
                         'CITY OF MUNTINLUPA': 'Muntinlupa', 
                         'CITY OF MARIKINA': 'Marikina', 
                         'CITY OF VALENZUELA': 'Valenzuela', 
                         'CITY OF MALABON': 'Malabon', 
                         'CITY OF NAVOTAS': 'Navotas', 
                         'PATEROS': 'Pateros'})

# CHECKER
z = []
for i in range(0, len(geojson1['features'])):
    y = list(geojson1['features'][i].values())[1]['NAME_2']
    z.append(y)
p = pd.DataFrame(z).rename({0: 'City'}, axis=1)
p

check = pd.merge(df_ncr, p, how='left', left_on='CityMunRes', right_on='City')
# check
In [48]:
fig = px.choropleth(df_ncr, geojson=geojson1, color="Count",
                    locations="CityMunRes", featureidkey="properties.NAME_2",
                    hover_name='Count',
                    projection="mercator", 
                    labels={'CityMunRes': 'City'}, 
                    title='Number of Covid-19 Confirmed Cases by NCR Cities (as of ' + str(asof.date().strftime("%b %d, %Y")) + ")" , 
                    color_continuous_scale=px.colors.sequential.OrRd, 
                    height=1100)
fig.update_layout(hoverlabel=dict(font_size=20, bgcolor='white', bordercolor='red'))
fig.update_geos(fitbounds="locations", visible=False)
fig.show()

Cumulative Cases By Area

Metro Manila, Cebu & Other Provinces (With Most Cases)

In [139]:
print("As of", asof.date().strftime("%b %d, %Y")) 
top = 10
topreg = df_regres.head(top).ProvRes.tolist()

df_regres_daily = covid_ph.groupby(['ProvRes', 'DateRepConf']).count()['CaseCode'].sort_values(ascending=False).reset_index()
df_regres_daily.sort_values(by=['ProvRes','DateRepConf'], inplace=True)
#df_regres_daily['ProvRes'] = df_regres_daily['ProvRes'].replace({'NCR': 'Metropolitan Manila'})
#df_regres_daily['Cum_count'] = df_regres_daily.groupby('ProvRes')['CaseCode'].cumsum()
df_regres_daily = df_regres_daily[df_regres_daily.ProvRes != 'For Validation']

df_regres_daily1 = pd.DataFrame()
for i in topreg:
    a = df_regres_daily[df_regres_daily.ProvRes == i].rename({'CaseCode': 'Count'}, axis=1)
    
    base = pd.DataFrame(covid_ph.DateRepConf.sort_values().unique())
    base.rename({0: 'DateRepConf'}, axis=1, inplace=True)
    # base = base[base.DateRepConf >= '2020-03-06']
    
    q = pd.merge(base, a, how='left', on='DateRepConf')
    q['ProvRes'] = i
    q['Count'] = q.Count.fillna(0)
    q['Cum_count'] = q.Count.cumsum()
#     q['Percent_change'] = q.Cum_count.pct_change()*100
    
    df_regres_daily1 = df_regres_daily1.append(q)

df_regres_daily1

# Figure 1
fig0 = px.line(df_regres_daily1, x='DateRepConf', y='Cum_count', 
               color='ProvRes', labels={'Cum_count': 'Cumulative Count'}, 
               title="Cumulative Case Count on Location with Most Cases")
fig0.update_layout(showlegend=True) 

a = pd.DataFrame(topreg).rename({0: 'RegRes'}, axis=1)
# for i in topreg[:2]:
#     yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
#     fig0.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                          arrowhead=0, xanchor='left', yanchor='middle',
#                          ax=0, ay=0,
#                          x=df_regres_daily1.DateRepConf.iloc[-1],
#                          y=yyy,
#                          text=" "*1 + str(yyy.astype('int')),
#                          font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
#     fig0.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                          arrowhead=0, xanchor='left', yanchor='middle',
#                          ax=0, ay=0,
#                          x=df_regres_daily1.DateRepConf.iloc[-1],
#                          y=yyy,
#                          text=" "*10 + i, #PLEASE EDIT IF NEEDED
#                          font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
fig0.show()

# Figure 2
fig1 = px.line(df_regres_daily1[~df_regres_daily1.ProvRes.isin(topreg[:2])], x='DateRepConf', y='Cum_count',
               color='ProvRes', labels={'Cum_count': 'Cumulative Count'}, 
               title="Cumulative Case Count on Location with Most Cases (excluding NCR & Cebu Province)", 
               color_discrete_sequence=px.colors.qualitative.Plotly[2:])
fig1.update_layout(showlegend=False) 

a = pd.DataFrame(topreg).rename({0: 'RegRes'}, axis=1)
# for i in topreg[1:2]:
#     yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
#     fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                          arrowhead=0, xanchor='left', yanchor='middle',
#                          ax=0, ay=0,
#                          x=df_regres_daily1.DateRepConf.iloc[-1],
#                          y=yyy+5,
#                          text=" "*1 + str(yyy.astype('int')),
#                          font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
#     fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                          arrowhead=0, xanchor='left', yanchor='middle',
#                          ax=0, ay=0,
#                          x=df_regres_daily1.DateRepConf.iloc[-1],
#                          y=yyy+5,
#                          text=" "*8 + i,
#                          font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
for i in topreg[2:5]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))

for i in topreg[5:6]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    
for i in topreg[6:7]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy+5,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy+5,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))

for i in topreg[7:8]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))

for i in topreg[8:9]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    
for i in topreg[9:10]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))       
# fig1.show()
As of May 26, 2020
In [50]:
fig1 = px.line(df_regres_daily1, x='DateRepConf', y='Cum_count', log_y=True, line_shape='spline',
               color='ProvRes', labels={'Cum_count': 'log(Cumulative Count)'}, 
               title="Cumulative Case Count on Location with Most Cases (Log Scale)")
fig1.update_layout(showlegend=True)
fig1.show()

NCR Cities

In [51]:
df_ncr = df_regres1[df_regres1['ProvRes'] == 'METRO MANILA']
topcity = df_ncr.CityMunRes.tolist()

df_city_daily = covid_ph.groupby(['CityMunRes', 'DateRepConf']).count()['CaseCode'].sort_values(ascending=False).reset_index()
df_city_daily.sort_values(by=['CityMunRes','DateRepConf'], inplace=True)

df_city_daily1 = pd.DataFrame()
for i in topcity:
    a = df_city_daily[df_city_daily.CityMunRes == i].rename({'CaseCode': 'Count'}, axis=1)
    
    base = pd.DataFrame(covid_ph.DateRepConf.sort_values().unique())
    base.rename({0: 'DateRepConf'}, axis=1, inplace=True)
    # base = base[base.DateRepConf >= '2020-03-06']
    
    q = pd.merge(base, a, how='left', on='DateRepConf')
    q['CityMunRes'] = i
    q['Count'] = q.Count.fillna(0)
    q['Cum_count'] = q.Count.cumsum()
#     q['Percent_change'] = q.Cum_count.pct_change()*100
    
    df_city_daily1 = df_city_daily1.append(q)

df_city_daily1

# Figure 1
fig0 = px.line(df_city_daily1, x='DateRepConf', y='Cum_count', 
               color='CityMunRes', labels={'Cum_count': 'Cumulative Count'}, 
               title="Cumulative Case Count in NCR Cities")
fig0.update_layout(showlegend=True) 
fig0.show()

# Figure 2
fig1 = px.line(df_city_daily1, x='DateRepConf', y='Cum_count', log_y=True,
               color='CityMunRes', labels={'Cum_count': 'log(Cumulative Count)'}, 
               title="Cumulative Case Count in NCR Cities (Log Scale)")
fig1.update_layout(showlegend=True) 
fig1.show()
# a = pd.DataFrame(topcity).rename({0: 'RegRes'}, axis=1)
# for i in topcity:
#     yyy = df_city_daily1[(df_city_daily1.CityMunRes == i) & (df_city_daily1.DateRepConf == pd.to_datetime(df_city_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
#     fig0.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                          arrowhead=0, xanchor='left', yanchor='middle',
#                          ax=0, ay=0,
#                          x=df_city_daily1.DateRepConf.iloc[-1],
#                          y=yyy,
#                          text=" "*1 + str(yyy.astype('int')),
#                          font_color=px.colors.qualitative.Plotly[topcity.index(i)]))
#     fig0.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                          arrowhead=0, xanchor='left', yanchor='middle',
#                          ax=0, ay=0,
#                          x=df_city_daily1.DateRepConf.iloc[-1],
#                          y=yyy,
#                          text=" "*12 + i,
#                          font_color=px.colors.qualitative.Plotly[topcity.index(i)]))

Daily Cases By Area

Metro Manila, Cebu & Other Provinces (With Most Cases)

Note: Trendline (in blue) is based on 7-day Moving Average.

In [52]:
# for i in topreg:
#     df = df_regres_daily1[df_regres_daily1.ProvRes == i].reset_index(drop=True)
#     cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
#     df['Trend_HP'] = trend1
#     df['Trend_MA7'] = df['Count'].rolling(7).mean()
#     df['Trend_MA3'] = df['Count'].rolling(3).mean()
    
#     fig1 = px.bar(df[df.ProvRes == i], x='DateRepConf', y='Count', color_discrete_sequence=['lightblue'])
#     fig1_trend = px.line(df[df.ProvRes == i], x='DateRepConf', y='Trend_MA7', color_discrete_sequence=["blue"])
#     fig1.add_trace(fig1_trend.data[0])
#     # fig1_trend1 = px.line(df[df.ProvRes == i], x='DateRepConf', y='Trend_MA3', color_discrete_sequence=["green"])
#     # fig1.add_trace(fig1_trend1.data[0])
#     fig1.update_yaxes(title="Case Count", tickfont_size=10)
#     fig1.update_layout(title=dict(x=0.5, text="Daily New Cases in " + f"<b>{i}</b>"), height=400,
#                        xaxis=dict(tickmode='linear', dtick=0, tickfont_size=9, title=None),
#                        legend=dict(orientation="v", title="",
#                                    x=0.05, y=0.8, yanchor="bottom"), 
#                        hoverlabel=dict(font_size=15))
#     fig1.show()
In [53]:
# zzz = df_regres.copy()
# zzz['ProvRes1'] = zzz['ProvRes'] + ": " + zzz['Count'].astype(str)
# topreg1 = zzz.head(10).ProvRes1.tolist()
# topreg1

fig000 = make_subplots(rows=5, cols=2, subplot_titles=topreg)

for i in topreg[::2]: # EVEN NUMBER
    df = df_regres_daily1[df_regres_daily1.ProvRes == i].reset_index(drop=True)
    cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
    df['Trend_HP'] = trend1
    df['Trend_MA7'] = df['Count'].rolling(7).mean()
    df['Trend_MA3'] = df['Count'].rolling(3).mean()
    df = df[df.DateRepConf >= '2020-03-01']
    
    fig1 = px.bar(df[df.ProvRes == i], x='DateRepConf', y='Count',
                  labels={'DateRepConf': 'Date'},
                  color_discrete_sequence=['lightblue'])
    fig1_trend = px.line(df[df.ProvRes == i], x='DateRepConf', y='Trend_MA7', line_shape='spline', color_discrete_sequence=["blue"])
    
    fig000.add_trace(fig1.data[0], row=topreg[::2].index(i)+1, col=1)
    fig000.add_trace(fig1_trend.data[0], row=topreg[::2].index(i)+1, col=1)
    
for i in topreg[1::2]: # EVEN NUMBER
    df = df_regres_daily1[df_regres_daily1.ProvRes == i].reset_index(drop=True)
    cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
    df['Trend_HP'] = trend1
    df['Trend_MA7'] = df['Count'].rolling(7).mean()
    df['Trend_MA3'] = df['Count'].rolling(3).mean()
    df = df[df.DateRepConf >= '2020-03-01']
    
    fig1 = px.bar(df[df.ProvRes == i], x='DateRepConf', y='Count', 
                  labels={'DateRepConf': 'Date'},
                  color_discrete_sequence=['lightblue'])
    fig1_trend = px.line(df[df.ProvRes == i], x='DateRepConf', y='Trend_MA7', line_shape='spline', color_discrete_sequence=["blue"])
    
    fig000.add_trace(fig1.data[0], row=topreg[1::2].index(i)+1, col=2)
    fig000.add_trace(fig1_trend.data[0], row=topreg[1::2].index(i)+1, col=2)

fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
fig000.update_yaxes(tickfont_size=10, title="Case Count", titlefont_size=10)
fig000.update_layout(title=dict(x=0.5, text="COVID-19 DAILY CASES IN PROVINCES"), showlegend=False, height=2000,
                     legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"), 
                     hoverlabel=dict(font_size=18))
    
fig000.show()
In [54]:
py.plot(fig000, filename='daily_cases_Provinces.html', auto_open=False)
Out[54]:
'daily_cases_Provinces.html'
In [55]:
# prov = ['TARLAC PROVINCE']
# fig000 = make_subplots(rows=5, cols=2, subplot_titles=prov)

# for i in prov: # EVEN NUMBER
#     df = df_regres_daily[df_regres_daily.ProvRes == i].reset_index(drop=True)
#     df = df.rename({'CaseCode': 'Count'}, axis=1)
#     cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
#     df['Trend_HP'] = trend1
#     df['Trend_MA7'] = df['Count'].rolling(7).mean()
#     df['Trend_MA3'] = df['Count'].rolling(3).mean()
#     df = df[df.DateRepConf >= '2020-03-01']
    
#     fig1 = px.bar(df[df.ProvRes == i], x='DateRepConf', y='Count',
#                   labels={'DateRepConf': 'Date'},
#                   color_discrete_sequence=['lightblue'])
#     fig1_trend = px.line(df[df.ProvRes == i], x='DateRepConf', y='Trend_HP', line_shape='spline', color_discrete_sequence=["blue"])
    
#     fig000.add_trace(fig1.data[0], row=prov[::2].index(i)+1, col=1)
#     fig000.add_trace(fig1_trend.data[0], row=prov[::2].index(i)+1, col=1)

# fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
# fig000.update_yaxes(tickfont_size=10, title="Case Count", titlefont_size=10)
# fig000.update_layout(title=dict(x=0.5), showlegend=False, height=2000,
#                      legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"), 
#                      hoverlabel=dict(font_size=18))

    
# fig000.show()

NCR Cities

Note: Trendline (in blue) is based on 7-day Moving Average.

In [56]:
# for i in topcity:
#     df = df_city_daily1[df_city_daily1.CityMunRes == i].reset_index(drop=True)
# #     cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
# #     df['Trend_HP'] = trend1
#     df['Trend_MA7'] = df['Count'].rolling(7).mean()
#     df['Trend_MA3'] = df['Count'].rolling(3).mean()
    
#     fig1 = px.bar(df[df.CityMunRes == i], x='DateRepConf', y='Count', color_discrete_sequence=["lightblue"])
#     fig1_trend = px.line(df[df.CityMunRes == i], x='DateRepConf', y='Trend_MA7', 
#                          color_discrete_sequence=["blue"])
#     fig1.add_trace(fig1_trend.data[0])
#     fig1.update_yaxes(title="Case Count", tickfont_size=10)
#     fig1.update_layout(title=dict(x=0.5, text="Daily New Cases in " + f"<b>{i}</b>"), height=400,
#                        xaxis=dict(tickmode='linear', dtick=0, tickfont_size=9, title=None),
#                        legend=dict(orientation="v", title="",
#                                    x=0.05, y=0.8, yanchor="bottom"), 
#                        hoverlabel=dict(font_size=15), hovermode="x")
#     fig1.show()
In [57]:
fig000 = make_subplots(rows=9, cols=2, subplot_titles=topcity)

for i in topcity[::2]: # ODD NUMBERS
    df = df_city_daily1[df_city_daily1.CityMunRes == i].reset_index(drop=True)
    cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
    df['Trend_HP'] = trend1
    df['Trend_MA7'] = df['Count'].rolling(7).mean()
    df['Trend_MA3'] = df['Count'].rolling(3).mean()
    df = df[df.DateRepConf >= '2020-03-01']
    
    fig1 = px.bar(df[df.CityMunRes == i], x='DateRepConf', y='Count',
                  labels={'DateRepConf': 'Date'},
                  color_discrete_sequence=['lightblue'])
    fig1_trend = px.line(df[df.CityMunRes == i], x='DateRepConf', y='Trend_MA7', line_shape='spline', 
                         color_discrete_sequence=["blue"])
    fig000.add_trace(fig1.data[0], row=topcity[::2].index(i)+1, col=1)
    fig000.add_trace(fig1_trend.data[0], row=topcity[::2].index(i)+1, col=1)
    
for i in topcity[1::2]: # EVEN NUMBERS
    df = df_city_daily1[df_city_daily1.CityMunRes == i].reset_index(drop=True)
    cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
    df['Trend_HP'] = trend1
    df['Trend_MA7'] = df['Count'].rolling(7).mean()
    df['Trend_MA3'] = df['Count'].rolling(3).mean()
    df = df[df.DateRepConf >= '2020-03-01']
    
    fig1 = px.bar(df[df.CityMunRes == i], x='DateRepConf', y='Count',
                  labels={'DateRepConf': 'Date'},
                  color_discrete_sequence=['lightblue'])
    fig1_trend = px.line(df[df.CityMunRes == i], x='DateRepConf', y='Trend_MA7', line_shape='spline',
                         color_discrete_sequence=["blue"])
    fig000.add_trace(fig1.data[0], row=topcity[1::2].index(i)+1, col=2)
    fig000.add_trace(fig1_trend.data[0], row=topcity[1::2].index(i)+1, col=2)

fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
fig000.update_yaxes(tickfont_size=10, title="Case Count", titlefont_size=10)
fig000.update_layout(title=dict(x=0.5, text="COVID-19 DAILY CASES in NCR"), showlegend=False, height=3200,
                     legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"), 
                     hoverlabel=dict(font_size=18))

    
fig000.show()
In [58]:
py.plot(fig000, filename='daily_cases_NCR.html', auto_open=False)
Out[58]:
'daily_cases_NCR.html'

Testing Capacity

In [59]:
df_tests_tot = df_tests.groupby('report_date')[['cumulative_unique_individuals',
                                                'cumulative_positive_individuals', 'cumulative_negative_individuals',
                                                'cumulative_samples_tested', 'remaining_available_tests', 'backlogs']].sum()
# df_tests_tot['invalid'] = df_tests_tot['cumulative_unique_individuals'] - (df_tests_tot['cumulative_positive_individuals'] + df_tests_tot['cumulative_negative_individuals'])
# df_tests_tot['%Invalid'] = 100 - (df_tests_tot['%Positive'] + df_tests_tot['%Negative'])
df_tests_tot['%Positive'] = 100*(df_tests_tot['cumulative_positive_individuals'] / df_tests_tot['cumulative_unique_individuals'])
df_tests_tot['%Negative'] = 100*(df_tests_tot['cumulative_negative_individuals'] / df_tests_tot['cumulative_unique_individuals'])
df_tests_tot['Total_Tests_Available'] = df_tests_tot['cumulative_samples_tested'] + df_tests_tot['remaining_available_tests']
df_tests_tot["backlogs"] = df_tests_tot["backlogs"].replace(0.0, '')
df_tests_tot = df_tests_tot.reset_index()
In [60]:
data = pd.melt(df_tests_tot[-1:], id_vars=['report_date'],
               value_vars=['cumulative_samples_tested', 'remaining_available_tests'],
               var_name='Test_Ratio', value_name = 'Count')
data['Test_Ratio'] = data['Test_Ratio'].replace({'remaining_available_tests': 'Remaining Available Tests Supplies', 
                                                 'cumulative_samples_tested': 'Samples Tested'})
data['Percent'] = (data.Count / data.Count.sum())*100
data['Pct'] = round(data['Percent'],1).astype('str') + '%'
data

q = 100*round(df_tests_tot[-1:].iloc[:,1].values[0] / df_tests_tot[-1:].iloc[:,4].values[0],3)
fig = px.bar(data, x='Percent', orientation='h', color='Test_Ratio', 
             text='Count', hover_name='Pct', height=250,
#              color_discrete_sequence=['#66c2a5', '#84dbc0'])
             color_discrete_sequence=['#64B0A2', '#C0D4CD'])
fig.update_layout(legend=dict(orientation='h', title=""), title="Availability of Test Kits")
fig.update_layout(showlegend=False, xaxis=dict(title=None), hoverlabel=dict(font_size=20))

fig.for_each_trace(lambda t: t.update(textfont_size=16))

for i in range(0, len(data.Percent.unique())):
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=data.Percent.cumsum().unique()[i]-0.001, 
                            y=-0.6,
                            text=data.Test_Ratio[i]), font_size=16,
                       font_color = ['#64B0A2', '#C0D4CD'][i])
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=data.Percent.cumsum().unique()[i]-0.001, 
                            y=0.6,
                            text=data.Pct[i]), font_size=16,
                       font_color = ['#64B0A2', '#C0D4CD'][i])
    
print("As of", df_tests.report_date.max().date().strftime("%b %d, %Y"))
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()
As of May 25, 2020
In [61]:
# Figure 0
data = pd.melt(df_tests_tot[-1:], id_vars=['report_date'],
               value_vars=['cumulative_negative_individuals', 'cumulative_positive_individuals'],
               var_name='Test_Ratio', value_name = 'Count')
data['Test_Ratio'] = data['Test_Ratio'].replace({'cumulative_negative_individuals': 'Negative Individuals', 
                                                 'cumulative_positive_individuals': 'Positive Individuals'})
data['Percent'] = (data.Count / data.Count.sum())*100
data['Pct'] = round(data['Percent'],1).astype('str') + '%'
data

q = round(df_tests_tot[-1:].iloc[:,1].values[0] / df_tests_tot[-1:].iloc[:,4].values[0]*100,1)
fig = px.bar(data, x='Percent', orientation='h', color='Test_Ratio', 
             text='Count', hover_name='Pct', height=250)
fig.update_layout(legend=dict(orientation='h', title="", font_size=16), showlegend=False, 
                  xaxis=dict(title=None), hoverlabel=dict(font_size=20),
                  title=("Unique Individuals Tested: " + f"<b>{df_tests_tot[-1:].iloc[:,1].values[0]}</b>" + 
                         " (" + q.astype(str) + "% of Total Samples Tested)"))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=16))
for i in range(0, len(data.Percent.unique())):
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=data.Percent.cumsum().unique()[i]-0.001, y=0.6,
                            text=data.Pct[i]), font_size=16,
                       font_color = px.colors.qualitative.Plotly[i])
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=data.Percent.cumsum().unique()[i]-0.001, y=-0.6,
                            text=['Negative', 'Positive'][i]), font_size=14,
                       font_color = px.colors.qualitative.Plotly[i])
    
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)

fig.show()
print("Note: There are more positive individuals tested vs publicly reported because of undergoing case validation and processing.")
Note: There are more positive individuals tested vs publicly reported because of undergoing case validation and processing.
In [62]:
df = pd.melt(df_tests_tot, id_vars=['report_date'],
#             value_vars=['%Negative', '%Positive'], 
             value_vars=['%Positive', '%Negative'], 
             var_name = 'Ratio', value_name = 'Percent')
fig = px.bar(df, x='report_date', y='Percent', color='Ratio', 
             color_discrete_sequence=list(reversed(px.colors.qualitative.Plotly[0:2])))
fig.update_layout(title=dict(x=0.5, text="Historical Testing Ratio on Unique Individuals"),
                  xaxis=dict(tickmode='linear', #tick0=4, 
                             dtick=0, tickfont_size=12, title="Date"),
                  yaxis=dict(tickfont_size=10),
                  legend=dict(orientation="h", title="",
                              x=0, y=0.97, 
                              yanchor="bottom"))

fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=True, showgrid=True, zeroline=False, showline=False)
fig.update_layout(hoverlabel=dict(font_size=20), hovermode='x')
fig.show()

print("Note: Testing ratio here is computed based on the total samples tested as of that date")
Note: Testing ratio here is computed based on the total samples tested as of that date
In [63]:
df_tests_tot1 = df_tests.groupby(['report_date', 'facility_name'])[['cumulative_unique_individuals',
                                                'cumulative_positive_individuals', 'cumulative_negative_individuals',
                                                'cumulative_samples_tested', 'remaining_available_tests', 'backlogs']].sum()
df_tests_tot1['%Positive'] = 100*(df_tests_tot1['cumulative_positive_individuals'] / df_tests_tot1['cumulative_unique_individuals'])
df_tests_tot1['%Negative'] = 100*(df_tests_tot1['cumulative_negative_individuals'] / df_tests_tot1['cumulative_unique_individuals'])
df_tests_tot1['Total_Tests_Available'] = df_tests_tot1['cumulative_samples_tested'] + df_tests_tot1['remaining_available_tests']
df_tests_tot1 = df_tests_tot1.reset_index()
df_test_fac = df_tests_tot1[df_tests_tot1.report_date == df_tests_tot1.report_date.max()].sort_values('cumulative_samples_tested', ascending=False).reset_index(drop=True)
df_test_fac['%Total'] = round(df_test_fac['cumulative_unique_individuals'] / df_test_fac['cumulative_unique_individuals'].sum() * 100, 1)
In [64]:
df_test_fac['Facility'] = 'Testing Facility'
fig = px.treemap(df_test_fac,
                 path=['Facility', 'facility_name'],
                 values='cumulative_unique_individuals',
                 hover_name='%Total',
                 title='Number of Samples Tested by Facility',
                 branchvalues='total', height=700)
fig.update_layout(hoverlabel=dict(font_size=18))
fig.show()
In [65]:
# df_tests_tot['NEW UNIQUE TESTS'] = df_tests_tot[df_tests_tot.columns[1]].diff()
# df_tests_tot['NEW POSITIVE'] = df_tests_tot['cumulative_positive_individuals'].diff()

df_tests_tot['NEW UNIQUE TESTS'] = df_tests.groupby('report_date').daily_output_samples_tested.sum().values
df_tests_tot['NEW POSITIVE'] = df_tests.groupby('report_date').daily_output_positive_individuals.sum().values
df_tests_tot['NEW %POSITIVE'] = (df_tests_tot['NEW POSITIVE']/df_tests_tot['NEW UNIQUE TESTS'])*100
df_tests_tot['TREND'] = df_tests_tot['NEW %POSITIVE'].rolling(7).mean()
df_tests_tot["type1"] = 'new unique tests'
df_tests_tot["type2"] = 'backlogs^'


fig = make_subplots(specs=[[{"secondary_y": True}]])

fig1 = px.bar(df_tests_tot, x='report_date', y='NEW UNIQUE TESTS', color='type1',
              color_discrete_sequence=["#00CC96"])

fig2 = px.line(df_tests_tot, x='report_date', y='%Positive', 
               color_discrete_sequence=["red"])
fig2.data[0].update(mode='markers+lines')

fig3 = px.bar(df_tests_tot, x='report_date', y='backlogs', color='type2',
               color_discrete_sequence=["blue"])
# fig3.data[0].update(mode='markers+lines')

fig.add_trace(fig1.data[0], secondary_y=False)
fig.add_trace(fig2.data[0], secondary_y=True)
fig.add_trace(fig3.data[0], secondary_y=False)

fig.update_yaxes(title="%Positive", tickfont_size=10, color='red', 
                 range=[0, df_tests_tot['%Positive'].max()*1.1], 
                 showgrid=False, secondary_y=True)
fig.update_yaxes(title='No. of Conducted Test', color='#00CC96',
                 #range=[0, df_tests_tot['UNIQUE INDIVIDUALS TESTED'].max()*1.5], 
                 showgrid=True, tickfont_size=12, secondary_y=False)

fig.update_layout(title=dict(x=0.5, text="%Positive* on Daily Conducted Tests"),
                  xaxis=dict(tickmode='linear', #tick0=4,
                             dtick=0, tickfont_size=10, title="Date"),
                  yaxis=dict(dtick=1000), hoverlabel=dict(font_size=20),
                  legend=dict(orientation="v", title="",
                              x=0.03, y=0.4, yanchor="bottom"))

# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='left', yanchor='middle', ax=0, ay=0,
#                         x=pd.to_datetime(df_tests_tot.Date.unique().max()), 
#                         y=df_tests_tot['%POSITIVE'][-1:].values[0],
#                         text=" "*2 + round(df_tests_tot['%POSITIVE'][-1:].values[0],1).astype('str')+'%', 
#                         font_size=14, font_color=px.colors.qualitative.Plotly[1]))

fig.show()

print('*Computed based on the ratio of cumulative positive cases over cumulative conducted tests.')
print('^These are number of specimens submitted in the laboratory with no results released. Backlog data only started on May 19.')
*Computed based on the ratio of cumulative positive cases over cumulative conducted tests.
^These are number of specimens submitted in the laboratory with no results released. Backlog data only started on May 19.
In [66]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig1 = px.bar(df_tests_tot, x='report_date', 
              # y='UNIQUE INDIVIDUALS TESTED',
              y='NEW UNIQUE TESTS',
              color_discrete_sequence=["#00CC96"])
fig2 = px.line(df_tests_tot, x='report_date', 
               y='NEW %POSITIVE', 
               color_discrete_sequence=["red"])
fig2.data[0].update(mode='markers+lines')

df_tests_tot['TREND_NAME'] = '%Positive Trend (7-day MA)'
fig_trend = px.line(df_tests_tot, x='report_date', 
               y='TREND', color='TREND_NAME',
               color_discrete_sequence=["red"])

fig.add_trace(fig1.data[0], secondary_y=False)
fig.add_trace(fig2.data[0], secondary_y=True)
fig.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}), secondary_y=True)

fig.update_yaxes(title="%Positive", tickfont_size=10, color='red',
                 range=[0, df_tests_tot['%Positive'].max()*1.1], 
                 showgrid=False, secondary_y=True)
fig.update_yaxes(title='No. of Conducted Test', color='#00CC96',
                 #range=[0, df_tests_tot['UNIQUE INDIVIDUALS TESTED'].max()*1.5], 
                 showgrid=True, tickfont_size=10, secondary_y=False)

fig.update_layout(title=dict(x=0.5, text="Estimated %Positive** on Daily Tests"),
                  xaxis=dict(tickmode='linear', #tick0=4,
                             dtick=0, tickfont_size=10, title="Date"),
                  yaxis=dict(dtick=1000), showlegend=True,
                  legend=dict(orientation="h", title="", 
                              x=0.6, y=1, yanchor="bottom"))

# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='left', yanchor='middle', ax=0, ay=0,
#                         x=pd.to_datetime(df_tests_tot.Date.unique().max()), 
#                         y=df_tests_tot['%POSITIVE'][-1:].values[0],
#                         text=" "*2 + round(df_tests_tot['%POSITIVE'][-1:].values[0],1).astype('str')+'%', 
#                         font_size=14, font_color=px.colors.qualitative.Plotly[1]))

fig.show()

print("**Computed based on the ratio of new positive cases reported over new conducted tests on that day.")
**Computed based on the ratio of new positive cases reported over new conducted tests on that day.

Time of Recovery / Death

In [67]:
df_recovered = covid_ph.loc[(covid_ph.RemovalType=='Recovered')].reset_index(drop=True)
df_died = covid_ph.loc[(covid_ph.RemovalType=='Died')].reset_index(drop=True)

df_recovered['Days'] = (df_recovered.DateRecover - df_recovered.DateRepConf).dt.days
df_died['Days'] = (df_died.DateDied - df_died.DateRepConf).dt.days

df = pd.concat([df_recovered, df_died])
fig = px.histogram(df, x='Days', color='RemovalType', opacity=0.8, 
                   marginal='box', barmode='overlay', 
                   labels={"RemovalType": "PatientType", "Days": "Days After Confirmation"}, 
                   title='Days of Recovery / Death after the Confirmation')
fig.update_layout(shapes=[dict(type= 'line', 
                               yref= 'paper', y0= 0, y1= 1,
                               xref= 'x', x0= 0, x1= 0)], 
                  legend=dict(orientation="h", title="",
                              x=0.0, y=0.65, yanchor="bottom"))

fig.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_recovered.Days.median()+2, y=0.87,
                        text=str(int(df_recovered.Days.median()))+' Days', font_size=11,
                        font_color = px.colors.qualitative.Plotly[0]))
fig.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_died.Days.median()+2, y=1,
                        text=str(int(df_died.Days.median()))+' Days', font_size=11,
                        font_color = px.colors.qualitative.Plotly[1]))

fig.show()

Most of these death cases probably needs to be validated if caused by Covid-19 before publicly reported.

Facility Capacity

In [68]:
f = pd.DataFrame()
for i in range(0, 17):
    ff = pd.read_csv("./data/nhfr/rfacilities2 (" + str(i) +").csv")
    ff.rename(columns={ff.columns[0]: "HealthFacilityCode"}, inplace = True)
    f = f.append(ff)
In [69]:
# f.to_excel("./data/ph_list_of_facilities.xlsx")
In [70]:
df_hosp1 = df_hosp.sort_values(by=['hfhudcode', 'reportdate'], ascending=[True, False]).reset_index(drop=True)
df_hosp2 = df_hosp1.groupby('hfhudcode').first().reset_index()
df_hosp2

df_hosp3 = pd.merge(df_hosp2, f.iloc[:, :15], how='left', left_on='hfhudcode', right_on='HealthFacilityCode')

# Categorizing Birthing Home as Infirmary
df_hosp3['Health Facility Type'] = df_hosp3['Health Facility Type'].replace({'Birthing Home': 'Infirmary'})

Total PH

In [71]:
data2 = df_hosp3.groupby(['Ownership Major Classification', 'Health Facility Type'])['id'].count().reset_index()
data2.columns = ['OwnershipType', 'FacilityType', 'Count']
data2['Percent'] = data2['Count']/data2['Count'].sum()*100
data2['Pct'] = round(data2['Percent'], 1).astype(str)+'%'
data2['Cum_percent'] = data2.Percent.cumsum()
data2['OFType'] = data2.OwnershipType + " " + data2.FacilityType

print("Hospital Data as of", df_hosp.reportdate.max())
kulay = ['rgb(27,158,119)', 'rgb(102,194,165)', 'rgb(217,95,2)', 'rgb(252,141,98)']
fig = px.bar(data2, x='Percent', orientation='h', color='OFType', 
             text='Count', hover_name='Pct', height=250, 
             labels={'Percent': " ", 'index':'HospitalType'}, 
             color_discrete_sequence=kulay)
fig.update_layout(legend=dict(orientation='h', title="", font_size=15.5), 
                  title=("Total Number of HealthCare Facilities: " + f"<b>{data2.Count.sum()}</b>" + "*"), 
                  hovermode=False)
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=20))
# for i in range(0, len(data2.Percent.unique())):
#     fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                             arrowhead=0, xanchor='right', yanchor='middle',
#                             ax=0, ay=0,
#                             x=data2.Cum_percent.unique()[i]-0.001, y=0.5,
#                             text=data2.Pct[i]), font_size=12, font_color = kulay[i])
    
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()

z = str(round(df_hosp3[df_hosp3['HealthFacilityCode'].isnull()].shape[0], 0))
z1 = str(round(df_hosp3[df_hosp3['HealthFacilityCode'].isnull()].shape[0]/df_hosp3.shape[0]*100,1))+'%'

print("Note:")
print("* Only the facilities who reported on the DataCollect App. Also, some hospitals & infirmaries don't have updated counts.")
print("* There are", z, "(", z1, ")", "facilities with data but no facility tagging from The National Health Facility Registry.")
Hospital Data as of 2020-05-25 00:00:00
Note:
* Only the facilities who reported on the DataCollect App. Also, some hospitals & infirmaries don't have updated counts.
* There are 10 ( 0.5% ) facilities with data but no facility tagging from The National Health Facility Registry.
In [72]:
data003 = pd.DataFrame()
for i in ['icu', 'isolbed', 'beds_ward', 'mechvent']:
    data3 = df_hosp3.groupby(['Ownership Major Classification', 
                              'Health Facility Type'])[[i+'_v', 
                                                        i+'_o']].sum().reset_index().rename({'Ownership Major Classification': 'OwnershipType', 
                                                                                             'Health Facility Type': 'FacilityType'}, axis=1)
    data03 = pd.melt(data3, id_vars=['OwnershipType', 'FacilityType'], 
                     value_vars=[i+'_v', i+'_o'], var_name=['Availability'], 
                     value_name='Count')
    data03['Availability'] = data03['Availability'].replace({i+'_v': 'vacant', i+'_o': 'occupied'})
    data03['Type'] = i
    data003 = data003.append(data03)
    
data003['Type'] = data003['Type'].replace({'icu': 'ICU Beds', 'isolbed': 'Isolation Beds', 
                                           'beds_ward': 'Ward Beds', 'mechvent': 'Mechanical Ventilators'})
data003 = data003.reset_index(drop=True)
data003['Percent'] = data003.groupby(['Type','OwnershipType','FacilityType'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data003['Pct'] = round(data003['Percent'],1).astype('str') + '%'
data003['Type'] = data003['Type'].replace({'Mechanical Ventilators': 'Mech Vents'})
data003

##############################################################################

# TOTAL
data100 = data003.groupby(['Type', 'Availability'])['Count'].sum().reset_index()
data100['Percent'] = data100.groupby(['Type'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data100['Pct'] = round(data100['Percent'],1).astype('str') + '%'
x = data100[data100.Type.isin(['ICU Beds', 'Isolation Beds', 'Ward Beds'])]
y = data100[data100.Type.isin(['Mech Vents'])]
data100 = pd.concat([x, y])
data100 = data100.sort_values(by='Availability', ascending=False)

fig = px.bar(data100, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count')
fig.update_layout(legend=dict(orientation='h', x=0.01, y=-0.1, title="", font_size=16), 
                  hoverlabel=dict(font_size=20),
                  title=("Availability of Beds & Mechanical Ventilators in <b>Total PH Facilities</b>"))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=18))
fig.update_xaxes(title=None, showticklabels=True, showgrid=False, zeroline=False, showline=False, side='bottom', tickfont_size=16)
fig.update_yaxes(title=None, showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_layout(shapes=[dict(type= 'line', yref= 'paper', y0= -0.05, y1= 0.95, xref= 'x', x0= 2.5, x1= 2.5, 
                               line=dict(color="gray", width=1, dash="dash"))])
for i in range(0, len(data003.Type.unique())):
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='center', yanchor='middle',
                            ax=0, ay=0,
                            x=i, 
                            y=-5,
                            text="Count: " + str(data003.groupby('Type').sum().Count[i])), font_size=12, font_color = "gray")
fig.show()

print("Note: These are beds dedicted exclusively for COVID-19 cases. Please interpret with caution, accuracy of data is not assured.")
Note: These are beds dedicted exclusively for COVID-19 cases. Please interpret with caution, accuracy of data is not assured.
Breakdown by Facility Type
In [73]:
# GOVERNMENT - HOSPITAL
data0011 = data003[(data003['OwnershipType'] == 'Government') & (data003['FacilityType'] == 'Hospital')].reset_index(drop=True)
fig3 = px.bar(data0011, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', hover_data=None)
fig3.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""), 
                  hoverlabel=dict(font_size=20), 
                  title=("Availability of Beds & Mechanical Vents in <b>Government Facilities</b>"))
fig3.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig3.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig3.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig3.show()

# GOVERNMENT - INFIRMARIES
data0011 = data003[(data003['OwnershipType'] == 'Government') & (data003['FacilityType'] == 'Infirmary')].reset_index(drop=True)
fig4 = px.bar(data0011, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', hover_data=None)
fig4.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""), 
                  hoverlabel=dict(font_size=20), 
                  title=("Availability of Beds & Mechanical Vents in <b>Private Facilities</b>"))
fig4.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig4.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig4.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig4.show()

# PRIVATE - HOSPITAL 
data0022 = data003[(data003['OwnershipType'] == 'Private') & (data003['FacilityType'] == 'Hospital')].reset_index(drop=True)
fig5 = px.bar(data0022, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', hover_data=None)
fig5.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""), 
                  hoverlabel=dict(font_size=20), 
                  title=("Availability of Beds & Mechanical Vents in <b>Government Facilities</b>"))
fig5.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig5.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig5.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig5.show()

# PRIVATE -INFIRMARIES
data0022 = data003[(data003['OwnershipType'] == 'Private') & (data003['FacilityType'] == 'Infirmary')].reset_index(drop=True)
fig6 = px.bar(data0022, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', hover_data=None)
fig6.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""), 
                  hoverlabel=dict(font_size=20), 
                  title=("Availability of Beds & Mechanical Vents in <b>Private Facilities</b>"))
fig6.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig6.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig6.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig6.show()

# SUBPLOTS
fig000 = make_subplots(rows=2, cols=2, subplot_titles=("Government Hospitals", "Government Infirmaries", 
                                                       "Private Hospitals", "Private Infirmaries"))
for loop in range(0, 2):
    fig000.add_trace(fig3.data[loop], row=1, col=1)
    fig000.add_trace(fig4.data[loop], row=1, col=2)
    fig000.add_trace(fig5.data[loop], row=2, col=1)
    fig000.add_trace(fig6.data[loop], row=2, col=2)
fig000.update_layout(barmode='stack', 
                     xaxis1=dict(tickmode='linear', dtick=0, tickfont_size=12),
                     xaxis2=dict(tickmode='linear', dtick=0, tickfont_size=12))
fig000.update_xaxes(title=None, side='bottom', tickmode='array', ticktext=['a', 'b', 'c', 'd'])
fig000.update_yaxes(title=None, showticklabels=False, showgrid=False, 
                    zeroline=False, showline=False)
fig000.update_layout(title=dict(x=0.5), showlegend=False, height=800,
                     legend=dict(orientation="h", x=-0.05, y=-0.4,
                                 yanchor="bottom"), hoverlabel=dict(font_size=20), 
                     xaxis_tickformat='<br>')
for i in range(0, len(data003.Type.unique())):
    fig000.add_annotation(dict(xref="x1", yref="y1", showarrow=False,
                               arrowhead=0, xanchor='center', yanchor='middle',
                               ax=0, ay=0, x=i, y=-5,
                               text="Count: " + str(int(fig3.data[0]['hovertext'][i]+fig3.data[1]['hovertext'][i])), 
                               font_size=11, font_color = "gray"))
    fig000.add_annotation(dict(xref="x2", yref="y2", showarrow=False,
                               arrowhead=0, xanchor='center', yanchor='middle',
                               ax=0, ay=0, x=i, y=-5,
                               text="Count: " + str(int(fig4.data[0]['hovertext'][i]+fig4.data[1]['hovertext'][i])), 
                               font_size=11, font_color = "gray"))
    fig000.add_annotation(dict(xref="x3", yref="y3", showarrow=False,
                               arrowhead=0, xanchor='center', yanchor='middle',
                               ax=0, ay=0, x=i, y=-5,
                               text="Count: " + str(int(fig5.data[0]['hovertext'][i]+fig5.data[1]['hovertext'][i])), 
                               font_size=11, font_color = "gray"))
    fig000.add_annotation(dict(xref="x4", yref="y4", showarrow=False,
                               arrowhead=0, xanchor='center', yanchor='middle',
                               ax=0, ay=0, x=i, y=-5,
                               text="Count: " + str(int(fig6.data[0]['hovertext'][i]+fig6.data[1]['hovertext'][i])), 
                               font_size=11, font_color = "gray"))

fig000.show()

print("Note: The graphs above only shows utilization in facilities that have submitted to the DOHDataCollect App only.")
Note: The graphs above only shows utilization in facilities that have submitted to the DOHDataCollect App only.

In NCR Only

In [74]:
df_hosp3_ncr = df_hosp3[df_hosp3['Region Name'] == 'NATIONAL CAPITAL REGION (NCR)'].reset_index(drop=True)

data0033 = pd.DataFrame()
for i in ['icu', 'isolbed', 'beds_ward', 'mechvent']:
    data33 = df_hosp3_ncr.groupby(['Ownership Major Classification', 
                                  'Health Facility Type'])[[i+'_v', 
                                                            i+'_o']].sum().reset_index().rename({'Ownership Major Classification': 'OwnershipType', 
                                                                                                 'Health Facility Type': 'FacilityType'}, axis=1)
    data033 = pd.melt(data33, id_vars=['OwnershipType', 'FacilityType'], 
                     value_vars=[i+'_v', i+'_o'], var_name=['Availability'], 
                     value_name='Count')
    data033['Availability'] = data033['Availability'].replace({i+'_v': 'vacant', i+'_o': 'occupied'})
    data033['Type'] = i
    data0033 = data0033.append(data033)
    
data0033['Type'] = data0033['Type'].replace({'icu': 'ICU Beds', 'isolbed': 'Isolation Beds', 
                                           'beds_ward': 'Ward Beds', 'mechvent': 'Mechanical Ventilators'})
data0033 = data0033.reset_index(drop=True)
data0033['Percent'] = data0033.groupby(['Type','OwnershipType','FacilityType'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data0033['Pct'] = round(data0033['Percent'],1).astype('str') + '%'
data0033['Type'] = data003['Type'].replace({'Mechanical Ventilators': 'Mech Vents'})
data0033

##############################################################################

# TOTAL
data100 = data0033.groupby(['Type', 'Availability'])['Count'].sum().reset_index()
data100['Percent'] = data100.groupby(['Type'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data100['Pct'] = round(data100['Percent'],1).astype('str') + '%'
x = data100[data100.Type.isin(['ICU Beds', 'Isolation Beds', 'Ward Beds'])]
y = data100[data100.Type.isin(['Mech Vents'])]
data100 = pd.concat([x, y])
data100 = data100.sort_values(by='Availability', ascending=False)

fig = px.bar(data100, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count')
fig.update_layout(legend=dict(orientation='h', x=0.01, y=-0.1, title="", font_size=16), 
                  hoverlabel=dict(font_size=20),
                  title=("Availability of Beds & Mechanical Ventilators in <b>NCR Facilities</b>"))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=18))
fig.update_xaxes(title=None, showticklabels=True, showgrid=False, zeroline=False, showline=False, side='bottom', tickfont_size=16)
fig.update_yaxes(title=None, showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_layout(shapes=[dict(type= 'line', yref= 'paper', y0= -0.05, y1= 0.95, xref= 'x', x0= 2.5, x1= 2.5, 
                               line=dict(color="gray", width=1, dash="dash"))])
for i in range(0, len(data003.Type.unique())):
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='center', yanchor='middle',
                            ax=0, ay=0,
                            x=i, 
                            y=-5,
                            text="Count: " + str(data0033.groupby('Type').sum().Count[i])), font_size=12, font_color = "gray")
fig.show()
In [75]:
# GOVERNMENT - HOSPITAL
data00111 = data0033[(data0033['OwnershipType'] == 'Government') & (data0033['FacilityType'] == 'Hospital')]
fig3 = px.bar(data00111, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', hover_data=None)
fig3.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""), 
                  hoverlabel=dict(font_size=20), 
                  title=("Availability of Beds & Mechanical Vents in <b>Government Facilities</b>"))
fig3.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig3.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig3.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig3.show()

# GOVERNMENT - INFIRMARIES
data00112 = data0033[(data0033['OwnershipType'] == 'Government') & (data0033['FacilityType'] == 'Infirmary')]
fig4 = px.bar(data00112, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', hover_data=None)
fig4.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""), 
                  hoverlabel=dict(font_size=20), 
                  title=("Availability of Beds & Mechanical Vents in <b>Private Facilities</b>"))
fig4.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig4.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig4.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig4.show()

# PRIVATE - HOSPITAL 
data00221 = data0033[(data0033['OwnershipType'] == 'Private') & (data0033['FacilityType'] == 'Hospital')]
fig5 = px.bar(data00221, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', hover_data=None)
fig5.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""), 
                  hoverlabel=dict(font_size=20), 
                  title=("Availability of Beds & Mechanical Vents in <b>Government Facilities</b>"))
fig5.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig5.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig5.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig5.show()

# PRIVATE -INFIRMARIES
data00222 = data0033[(data0033['OwnershipType'] == 'Private') & (data0033['FacilityType'] == 'Infirmary')]
fig6 = px.bar(data00222, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', hover_data=None)
fig6.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""), 
                  hoverlabel=dict(font_size=20), 
                  title=("Availability of Beds & Mechanical Vents in <b>Private Facilities</b>"))
fig6.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig6.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig6.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig6.show()

# SUBPLOTS
fig000 = make_subplots(rows=2, cols=2, subplot_titles=("Government Hospitals", "Government Infirmaries", 
                                                       "Private Hospitals", "Private Infirmaries"))
for loop in range(0, 2):
    fig000.add_trace(fig3.data[loop], row=1, col=1)
    fig000.add_trace(fig4.data[loop], row=1, col=2)
    fig000.add_trace(fig5.data[loop], row=2, col=1)
    fig000.add_trace(fig6.data[loop], row=2, col=2)
fig000.update_layout(barmode='stack', 
                     xaxis1=dict(tickmode='linear', dtick=0, tickfont_size=12),
                     xaxis2=dict(tickmode='linear', dtick=0, tickfont_size=12))
fig000.update_xaxes(title=None, side='bottom', tickmode='array', ticktext=['a', 'b', 'c', 'd'])
fig000.update_yaxes(title=None, showticklabels=False, showgrid=False, 
                    zeroline=False, showline=False)
fig000.update_layout(title=dict(x=0.5), showlegend=False, height=800,
                     legend=dict(orientation="h", x=-0.05, y=-0.4,
                                 yanchor="bottom"), hoverlabel=dict(font_size=20), 
                     xaxis_tickformat='<br>')
for i in range(0, len(data0033.Type.unique())):
    fig000.add_annotation(dict(xref="x1", yref="y1", showarrow=False,
                               arrowhead=0, xanchor='center', yanchor='middle',
                               ax=0, ay=0, x=i, y=-5,
                               text="Count: " + str(int(fig3.data[0]['hovertext'][i]+fig3.data[1]['hovertext'][i])), 
                               font_size=11, font_color = "gray"))
    fig000.add_annotation(dict(xref="x2", yref="y2", showarrow=False,
                               arrowhead=0, xanchor='center', yanchor='middle',
                               ax=0, ay=0, x=i, y=-5,
                               text="Count: " + str(int(fig4.data[0]['hovertext'][i]+fig4.data[1]['hovertext'][i])), 
                               font_size=11, font_color = "gray"))
    fig000.add_annotation(dict(xref="x3", yref="y3", showarrow=False,
                               arrowhead=0, xanchor='center', yanchor='middle',
                               ax=0, ay=0, x=i, y=-5,
                               text="Count: " + str(int(fig5.data[0]['hovertext'][i]+fig5.data[1]['hovertext'][i])), 
                               font_size=11, font_color = "gray"))
    fig000.add_annotation(dict(xref="x4", yref="y4", showarrow=False,
                               arrowhead=0, xanchor='center', yanchor='middle',
                               ax=0, ay=0, x=i, y=-5,
                               text="Count: " + str(int(fig6.data[0]['hovertext'][i]+fig6.data[1]['hovertext'][i])), 
                               font_size=11, font_color = "gray"))

fig000.show()

print("Note: The graphs above only shows utilization in facilities that have submitted to the DOHDataCollect App only.")
Note: The graphs above only shows utilization in facilities that have submitted to the DOHDataCollect App only.
In [76]:
# df_ppe1 = df_ppe.sort_values(by=['hfhudcode', 'reportdate'], ascending=[True, False]).reset_index(drop=True)
# df_ppe2 = df_ppe1.groupby('hfhudcode').first().reset_index()
# df_ppe2

# print("Availability of PPEs in Different Hospitals:")
# print("- Coverall:", df_ppe2.coverall.sum())
# print("- Face Shield:", df_ppe2.face_shield.sum())
# print("- Goggles:", df_ppe2.goggles.sum())
# print("- Gown:", df_ppe2.gown.sum())
# print("- N95 Mask:", df_ppe2.n95mask.sum())
# print("- Shoe Cover:", df_ppe2.shoe_cover.sum())
# print(" ")
# print("- Head Cover:", df_ppe2.head_cover.sum())
# print("- Surgical Mask:", df_ppe2.surgmask.sum())
# print("- Gloves:", df_ppe2.gloves.sum())

R(t) Estimation using Bayesian

Main Reference: Kevin Systrom

In any epidemic, $R_t$ is the measure known as the effective reproduction number. It can be simply defined as the number of people who can become infected per infectious person at time $t$. The most well-known version of this number is the basic reproduction number: $R_0$ when $t=0$. However, $R_0$ is a single measure that does not adapt with changes in behavior and restrictions.

As a pandemic evolves, increasing restrictions (or potential releasing of restrictions) changes $R_t$. Knowing the current $R_t$ is essential. When $R\gg1$, the pandemic will spread through a large part of the population. If $R_t$ $<1$, the pandemic will slow quickly before it has a chance to infect many people. The lower the $R_t$: the more manageable the situation.

Given this, what we want to see is $R_t$ of less than 1 which means that the pandemic in an area is under control.

In [77]:
def prepare_cases(cases):
    new_cases = cases.diff()

    smoothed = new_cases.rolling(7,
        win_type='gaussian',
        min_periods=1,
        center=True).mean(std=2).round()
    
    zeros = smoothed.index[smoothed.eq(0)]
    if len(zeros) == 0:
        idx_start = 0
    else:
        last_zero = zeros.max()
        idx_start = smoothed.index.get_loc(last_zero) + 1
    smoothed = smoothed.iloc[idx_start:]
    original = new_cases.loc[smoothed.index]
    
    return original, smoothed

Note that we will start the computation on March 5, 2020 - the date of the last zero new case. To derive the trend, Gaussian filter was applied to arrive with a smoothed time series. This will be the basis in calculating for each days' likelihood and posteriors as part of the Bayesian approach in estimating $R_t$. To know more about the methodology, you may check out the work of Kevin Systrom.

In [78]:
# cases = cases_daily.cumsum()
cases = cases_daily[35:].cumsum()
original, smoothed = prepare_cases(cases)

# original.plot(title=f"PH Daily New Cases",
#               c='k',
#               linestyle=':',
#               alpha=.5, 
#               label='Actual',
#               legend=True,
#               figsize=(1200/72, 600/72))

# ax = smoothed.plot(label='Smoothed', color='#0084ff', legend=True)
# ax.get_figure().set_facecolor('w')
In [79]:
from scipy import stats as sps

def get_posteriors(sr, sigma=0.15):

    # (1) Calculate Lambda
    lam = sr[:-1].values * np.exp(GAMMA * (r_t_range[:, None] - 1))

    # (2) Calculate each day's likelihood
    likelihoods = pd.DataFrame(
        data = sps.poisson.pmf(sr[1:].values, lam),
        index = r_t_range,
        columns = sr.index[1:])
    
    # (3) Create the Gaussian Matrix
    process_matrix = sps.norm(loc=r_t_range,
                              scale=sigma
                             ).pdf(r_t_range[:, None]) 

    # (3a) Normalize all rows to sum to 1
    process_matrix /= process_matrix.sum(axis=0)
    
    # (4) Calculate the initial prior
    prior0 = sps.gamma(a=4).pdf(r_t_range)
    prior0 /= prior0.sum()

    # Create a DataFrame that will hold our posteriors for each day
    # Insert our prior as the first posterior.
    posteriors = pd.DataFrame(
        index=r_t_range,
        columns=sr.index,
        data={sr.index[0]: prior0}
    )
    
    # We said we'd keep track of the sum of the log of the probability
    # of the data for maximum likelihood calculation.
    log_likelihood = 0.0

    # (5) Iteratively apply Bayes' rule
    for previous_day, current_day in zip(sr.index[:-1], sr.index[1:]):

        #(5a) Calculate the new prior
        current_prior = process_matrix @ posteriors[previous_day]
        
        #(5b) Calculate the numerator of Bayes' Rule: P(k|R_t)P(R_t)
        numerator = likelihoods[current_day] * current_prior
        
        #(5c) Calcluate the denominator of Bayes' Rule P(k)
        denominator = np.sum(numerator)
        
        # Execute full Bayes' Rule
        posteriors[current_day] = numerator/denominator
        
        # Add to the running sum of log likelihoods
        log_likelihood += np.log(denominator)
    
    return posteriors, log_likelihood

# We create an array for every possible value of Rt
R_T_MAX = 12
r_t_range = np.linspace(0, R_T_MAX, R_T_MAX*100+1)

# Gamma is 1/serial interval
# https://wwwnc.cdc.gov/eid/article/26/7/20-0282_article
# https://www.nejm.org/doi/full/10.1056/NEJMoa2001316
GAMMA = 1/7

# Note that we're fixing sigma to a value just for the example
posteriors, log_likelihood = get_posteriors(smoothed, sigma=.15)
In [80]:
# ax = posteriors.plot(title=f'Daily Posterior for $R_t$',
#                      legend=False, 
#                      lw=1, c='k', alpha=.3,
#                      figsize=(1200/72, 800/72), 
#                      xlim=(0.4,4))

# ax.set_xlabel('$R_t$')

Next, since the results include uncertainty, we'd view the most likely value of $R_t$ along with its highest-density interval (HDI). HDI simply indicates which points of a distribution are most credible, and which cover most of the distribution. It can be compared to a confidence interval where an estimate will mostly like falls on a range given a probability ($p$). For this analysis, we will be defining the High HDI and Low HDI given $p=0.9$.

In [81]:
def highest_density_interval(pmf, p=.9):
    # If we pass a DataFrame, just call this recursively on the columns
    if(isinstance(pmf, pd.DataFrame)):
        return pd.DataFrame([highest_density_interval(pmf[col], p=p) for col in pmf],
                            index=pmf.columns)
    
    cumsum = np.cumsum(pmf.values)
    best = None
    for i, value in enumerate(cumsum):
        for j, high_value in enumerate(cumsum[i+1:]):
            if (high_value-value > p) and (not best or j<best[1]-best[0]):
                best = (i, i+j+1)
                break
            
    low = pmf.index[best[0]]
    high = pmf.index[best[1]]
    return pd.Series([low, high], index=[f'Low_{p*100:.0f}', f'High_{p*100:.0f}'])
In [82]:
# Note that this takes a while to execute - it's not the most efficient algorithm
hdis = highest_density_interval(posteriors, p=.9)

most_likely = posteriors.idxmax().rename('Estimated Rt')

# Look into why you shift -1
result = pd.concat([most_likely, hdis], axis=1)

# result.tail()
In [83]:
# fig, ax = plt.subplots(figsize=(16, 8))
# most_likely.plot(marker='o',
#                  label='Most Likely',
#                  c='k',
#                  zorder=2,
#                  markersize=4,
#                  ax=ax)

# ax.fill_between(hdis.index,
#                 hdis['Low_90'],
#                 hdis['High_90'],
#                 color='k',
#                 zorder=1,
#                 alpha=.1,
#                 lw=0,
#                 label='Highest Density Interval')

# ax.axhline(1, linestyle='--')
# ax.axvline(most_likely.index[11], linewidth=2,
#            zorder=0, color='#fa3c4c',
#            label=f'Start of Lockdown: \n{str(most_likely.index[11].date())}')

# ax.axvline(most_likely.index[40], linewidth=2,
#            zorder=0, color='#44bec7',
#            label=f'Start of Progressive Mass Testing (?): \n{str(most_likely.index[40].date())}')

# ax.set_xlabel('Date', size=13)
# ax.set_ylabel('Effective Reproduction (Rt)', size=13)
# ax.set_title(f'$R_t$ by day', size=15, y=1.02)
# ax.legend();
In [84]:
fig = px.line(most_likely, y=most_likely.values, x=most_likely.index)
fig.update_xaxes(title='Date')
fig.update_yaxes(title='Effective Reproduction - R(t)')

fig.update_layout(title=dict(x=0.5, text='PH Estimated R(t) Based on New Daily Cases'), 
                  showlegend=True,
                  legend=dict(orientation="v", title="",
                              x=0.8, y=0.95, yanchor="top"), 
                  hoverlabel=dict(font_size=18))

fig.add_scatter(x=hdis['Low_90'].index, y=hdis['Low_90'].values, 
                fill='tonexty', line=dict(color='lightgray'), name='HDI (Low @ 90%)')
fig.add_scatter(x=hdis['High_90'].index, y=hdis['High_90'].values, 
                fill='tonexty', line=dict(color='lightgray'), name='HDI (High @ 90%)')
fig.add_scatter(y=most_likely.values, x=most_likely.index, line=dict(color='blue'), name='<b>R(t)</b>')
fig.add_scatter(y=[1]*most_likely.index.shape[0], x=most_likely.index, 
                line=dict(color='orange', dash='dash'), name='R(t) = 1')

fig.show()
In [85]:
# results_prov = pd.DataFrame()

# for i in topreg[:7]:
#     df0 = df_regres_daily1[df_regres_daily1['ProvRes'] == i] 
#     df = df0[['DateRepConf', 'Cum_count']][df0.DateRepConf >= '2020-03-05']
#     df1 = df.set_index('DateRepConf')
#     df1 = df1.iloc[:,0].astype('int64')
    
#     idx = pd.date_range('2020-03-05', df1.index[-1].date())
#     df1.index = pd.DatetimeIndex(df1.index)
#     df1 = df1.reindex(idx, fill_value=0)
#     df1

#     original, smoothed = prepare_cases(df1)

#     posteriors, log_likelihood = get_posteriors(smoothed, sigma=.15)

#     # Note that this takes a while to execute - it's not the most efficient algorithm
#     hdis = highest_density_interval(posteriors, p=.9)
#     most_likely = posteriors.idxmax().rename('Estimated Rt')

#     # Look into why you shift -1
#     result_prov = pd.concat([most_likely, hdis], axis=1)
#     result_prov['ProvRes'] = i
#     results_prov = results_prov.append(result_prov)
In [86]:
# fig000 = make_subplots(rows=4, cols=2, subplot_titles=topreg[:8])

# for i in topreg[:8][::2]: # EVEN NUMBER
#     fig = px.line(results_prov[results_prov.ProvRes == i]['Estimated Rt'], 
#                   y=results_prov[results_prov.ProvRes == i]['Estimated Rt'].values, 
#                   x=results_prov[results_prov.ProvRes == i]['Estimated Rt'].index)
#     fig.update_xaxes(title='Date')
#     fig.update_yaxes(title='Effective Reproduction - R(t)')

#     fig000.add_trace(fig.data[0], row=topreg[:8][::2].index(i)+1, col=1)
#     fig000.add_scatter(x=results_prov[results_prov.ProvRes == i]['High_90'].index, 
#                     y=results_prov[results_prov.ProvRes == i]['High_90'].values, 
#                     fill='tonexty', fillcolor='lightgray', line=dict(color='lightgray'), name='HDI - High 90%',
#                       row=topreg[:8][::2].index(i)+1, col=1)
#     fig000.add_scatter(x=results_prov[results_prov.ProvRes == i]['Low_90'].index, 
#                     y=results_prov[results_prov.ProvRes == i]['Low_90'].values, 
#                     fill='tonexty', fillcolor='lightgray',  line=dict(color='lightgray'), name='HDI - Low 90%',
#                       row=topreg[:8][::2].index(i)+1, col=1)
#     fig000.add_scatter(y=results_prov[results_prov.ProvRes == i]['Estimated Rt'].values, 
#                     x=results_prov[results_prov.ProvRes == i]['Estimated Rt'].index, 
#                     line=dict(color='blue'), name='R(t)', 
#                        row=topreg[:8][::2].index(i)+1, col=1)
#     fig000.add_scatter(y=[1]*results_prov[results_prov.ProvRes == i]['Estimated Rt'].index.shape[0],
#                        x=results_prov[results_prov.ProvRes == i]['Estimated Rt'].index, 
#                        line=dict(color='orange', dash='dash'), name='R(t)', 
#                        row=topreg[:8][::2].index(i)+1, col=1)
    
# for i in topreg[:8][1::2]: # ODD NUMBER
#     fig = px.line(results_prov[results_prov.ProvRes == i]['Estimated Rt'], 
#                   y=results_prov[results_prov.ProvRes == i]['Estimated Rt'].values, 
#                   x=results_prov[results_prov.ProvRes == i]['Estimated Rt'].index)
#     fig.update_xaxes(title='Date')
#     fig.update_yaxes(title='Effective Reproduction - R(t)')

#     fig000.add_trace(fig.data[0], row=topreg[:8][1::2].index(i)+1, col=2)
#     fig000.add_scatter(x=results_prov[results_prov.ProvRes == i]['High_90'].index, 
#                     y=results_prov[results_prov.ProvRes == i]['High_90'].values, 
#                     fill='tonexty', fillcolor='lightgray', line=dict(color='lightgray'), name='HDI - High 90%',
#                       row=topreg[:8][1::2].index(i)+1, col=2)
#     fig000.add_scatter(x=results_prov[results_prov.ProvRes == i]['Low_90'].index, 
#                     y=results_prov[results_prov.ProvRes == i]['Low_90'].values, 
#                     fill='tonexty', fillcolor='lightgray',  line=dict(color='lightgray'), name='HDI - Low 90%',
#                       row=topreg[:8][1::2].index(i)+1, col=2)
#     fig000.add_scatter(y=results_prov[results_prov.ProvRes == i]['Estimated Rt'].values, 
#                     x=results_prov[results_prov.ProvRes == i]['Estimated Rt'].index, 
#                     line=dict(color='blue'), name='R(t)', 
#                        row=topreg[:8][1::2].index(i)+1, col=2)
#     fig000.add_scatter(y=[1]*results_prov[results_prov.ProvRes == i]['Estimated Rt'].index.shape[0],
#                        x=results_prov[results_prov.ProvRes == i]['Estimated Rt'].index, 
#                        line=dict(color='orange', dash='dash'), name='R(t)', 
#                        row=topreg[:8][1::2].index(i)+1, col=2)

# fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
# fig000.update_yaxes(tickfont_size=10, title="R(t)", titlefont_size=10)
# fig000.update_layout(title=dict(x=0.5, text="Estimated R(t) by Provinces With Most Cases"), 
#                      showlegend=False, height=1600,
#                      legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"), 
#                      hoverlabel=dict(font_size=18))
    
# fig000.show()
In [87]:
# def prepare_cases(cases):
#     new_cases = cases.diff()

#     smoothed = new_cases.rolling(7,
#         win_type='gaussian',
#         min_periods=1,
#         center=True).mean(std=2).round()
    
#     zeros = smoothed.index[smoothed.eq(0)]
#     if len(zeros) == 0:
#         idx_start = 0
#     else:
#         last_zero = zeros.max()
#         idx_start = smoothed.index.get_loc(last_zero) + 1
#     smoothed = smoothed.iloc[idx_start:]
#     original = new_cases.loc[smoothed.index]
    
#     return original, smoothed
In [88]:
# results_ncr = pd.DataFrame()
# for i in topcity[:-4]:
#     df0 = df_city_daily1[df_city_daily1['CityMunRes'] == i] 
#     df = df0[['DateRepConf', 'Cum_count']][df0.DateRepConf >= '2020-03-05']
#     df1 = df.set_index('DateRepConf')
#     df1 = df1.iloc[:,0].astype('int64')
    
#     idx = pd.date_range('2020-03-05', df1.index[-1].date())
#     df1.index = pd.DatetimeIndex(df1.index)
#     df1 = df1.reindex(idx, fill_value=0)
#     df1

#     original, smoothed = prepare_cases(df1)

#     posteriors, log_likelihood = get_posteriors(smoothed, sigma=.15)

#     # Note that this takes a while to execute - it's not the most efficient algorithm
#     hdis = highest_density_interval(posteriors, p=.9)
#     most_likely = posteriors.idxmax().rename('Estimated Rt')

#     # Look into why you shift -1
#     result_ncr = pd.concat([most_likely, hdis], axis=1)
#     result_ncr['CityMunRes'] = i
#     results_ncr = results_ncr.append(result_ncr)
In [89]:
# fig000 = make_subplots(rows=7, cols=2, subplot_titles=topcity[:-3])

# for i in topcity[:-3][::2]: # EVEN NUMBER
#     fig = px.line(results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'], 
#                   y=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].values, 
#                   x=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index)
#     fig.update_xaxes(title='Date')
#     fig.update_yaxes(title='Effective Reproduction - R(t)')

#     fig000.add_trace(fig.data[0], row=topcity[:-3][::2].index(i)+1, col=1)
#     fig000.add_scatter(x=results_ncr[results_ncr.CityMunRes == i]['High_90'].index, 
#                     y=results_ncr[results_ncr.CityMunRes == i]['High_90'].values, 
#                     fill='tonexty', fillcolor='lightgray', line=dict(color='lightgray'), name='HDI - High 90%',
#                       row=topcity[:-3][::2].index(i)+1, col=1)
#     fig000.add_scatter(x=results_ncr[results_ncr.CityMunRes == i]['Low_90'].index, 
#                     y=results_ncr[results_ncr.CityMunRes == i]['Low_90'].values, 
#                     fill='tonexty', fillcolor='lightgray',  line=dict(color='lightgray'), name='HDI - Low 90%',
#                       row=topcity[:-3][::2].index(i)+1, col=1)
#     fig000.add_scatter(y=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].values, 
#                     x=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index, 
#                     line=dict(color='blue'), name='R(t)', 
#                        row=topcity[:-3][::2].index(i)+1, col=1)
#     fig000.add_scatter(y=[1]*results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index.shape[0],
#                        x=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index, 
#                        line=dict(color='orange', dash='dash'), name='R(t)', 
#                        row=topcity[:-3][::2].index(i)+1, col=1)
    
# for i in topcity[:-3][1::2]: # EVEN NUMBER
#     fig = px.line(results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'], 
#                   y=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].values, 
#                   x=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index)
#     fig.update_xaxes(title='Date')
#     fig.update_yaxes(title='Effective Reproduction - R(t)')

#     fig000.add_trace(fig.data[0], row=topcity[:-3][1::2].index(i)+1, col=2)
#     fig000.add_scatter(x=results_ncr[results_ncr.CityMunRes == i]['High_90'].index, 
#                     y=results_ncr[results_ncr.CityMunRes == i]['High_90'].values, 
#                     fill='tonexty', fillcolor='lightgray', line=dict(color='lightgray'), name='HDI - High 90%',
#                       row=topcity[:-3][1::2].index(i)+1, col=2)
#     fig000.add_scatter(x=results_ncr[results_ncr.CityMunRes == i]['Low_90'].index, 
#                     y=results_ncr[results_ncr.CityMunRes == i]['Low_90'].values, 
#                     fill='tonexty', fillcolor='lightgray',  line=dict(color='lightgray'), name='HDI - Low 90%',
#                       row=topcity[:-3][1::2].index(i)+1, col=2)
#     fig000.add_scatter(y=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].values, 
#                     x=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index, 
#                     line=dict(color='blue'), name='R(t)', 
#                        row=topcity[:-3][1::2].index(i)+1, col=2)
#     fig000.add_scatter(y=[1]*results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index.shape[0],
#                        x=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index, 
#                        line=dict(color='orange', dash='dash'), name='R(t)', 
#                        row=topcity[:-3][1::2].index(i)+1, col=2)

# fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
# fig000.update_yaxes(tickfont_size=10, title="R(t)", titlefont_size=10)
# fig000.update_layout(title=dict(x=0.5, text="Estimated R(t) by NCR Cities"), 
#                      showlegend=False, height=3200,
#                      legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"), 
#                      hoverlabel=dict(font_size=18))
    
# fig000.show()

# print("Note: Malabon, Navatos and Pateros can't be estimated well due to small number of new cases")

Log-Ratio: New vs Total Cases

In [90]:
cc2 = cc[31:].reset_index(drop=True) # March 1, 2020
cc2['Date'] = cc2['Date'].dt.date
cc2['Count_rmean_7days'] = cc2.Count.rolling(7).mean()
cc2['Cum_count_rmean_7days'] = cc2.Count.cumsum().rolling(7).mean()

fig = px.line(cc2, x='Cum_count_rmean_7days', y='Count_rmean_7days',
              hover_name='Date', title='New Cases vs Total Cases (7-day Rolling Average)',
              log_x=True, log_y=True)
fig.update_xaxes(title="log(Total Confirmed Cases)")
fig.update_yaxes(title="log(New Confirmed Cases)")

fig.show()

The growth started to slow down during the first week of April & currently moving (fluctuating) sideways. Yes, it started to plateau but what we need is a downtrend / sudden drop to achieve a "flattening curve".

Doubling Time

Doubling time is the defined as the length of time for the present value of variable to double given the prevailing rate of increase. It is based on the compound accumulation model. Here's the formula for doubling time ${d_{t,y}}$:

${d_{t,y}}$ = $\frac{ln (2)}{ln(1+P_{t,y})}$

where $P_{t,y}$ is the percent change of a value (i.e. Covid-19 total cases) at any time ${t}$

Source: PJ Cayton and JG Sarmiento

In [91]:
# "Epidemic doubling times characterize the sequence of intervals at which the cumulative incidence doubles (3). If an epidemic is growing exponentially with a constant growth rate ${r}$, the doubling time remains constant and equals $\frac{ln 2}{r}$. An increase in the doubling time indicates a slowdown in transmission if the underlying reporting rate remains unchanged." 
# Source: https://wwwnc.cdc.gov/eid/article/26/8/20-0219_article

# To calculate the doubling time for a population (e.g. COVID-19 cases), use the formula = $\frac{x ln(2)}{ln \frac{y}{z}$, where

# - x is the time that has passed since you started measuring. For example, if the number of cases went from 500 on day 0 to 1000 on day 2, x is 2.
# - y is the number of cases on day x, e.g 1000 on day 2.
# - z is the number of cases on day 0, e.g. 500."

# Source: https://blog.datawrapper.de/weekly-chart-coronavirus-doublingtimes/
In [92]:
# # version1
# c2 = c1.copy()
# c2['doubling_time1'] = np.log(2) / (c2['%Change']/100)
# c2['doubling_time2'] = np.log(2) / (c2['trend']/100)

# # version2
# c2 = c1.copy()
# c2['doubling_time1'] = np.log10(2) / (c2['%Change']/100)
# c2['doubling_time2'] = np.log10(2) / (c2['trend']/100)

# version3
c2 = c1.copy()
c2['doubling_time1'] = np.log(2) / np.log(1 + (c2['%Change']/100))
c2['doubling_time2'] = np.log(2) / np.log(1 + (c2['trend']/100))

# version3.1
# c2['doubling_time3'] = c2['doubling_time1'].rolling(7).mean()
# cyclex, trendx = sm.tsa.filters.hpfilter(c2['doubling_time1'], 7)
# c2['doubling_time4'] = trendx

c2['Days'] = 'Doubling Time'
c2['Trend_name1'] = 'Trend (based on HP Filter)'
fig_trend1 = px.line(c2, x='Date', y='doubling_time2', color='Trend_name1', 
                     color_discrete_sequence=["cadetblue"], 
                     labels={'doubling_time2': 'Days-To-Double'})
# c2['Trend_name2'] = 'Trend (based on 7-day rolling mean)'
# fig_trend2 = px.line(c2, x='Date', y='doubling_time4', color='Trend_name2', 
#                      color_discrete_sequence=["cadetblue"])

fig2 = px.line(c2, x='Date', y='doubling_time1', color='Days',
               labels={'doubling_time1': 'Days-To-Double'},
               color_discrete_sequence=["lightblue"])

fig2.add_trace(fig_trend1.data[0].update(line={'dash': 'dash'}))
# fig2.add_trace(fig_trend2.data[0].update(line={'dash': 'dashdot'}))

fig2.update_yaxes(title="Days-to-Double", showgrid=True, tickfont_size=15)
fig2.update_layout(title=dict(x=0.5, text="Doubling Time in Confirmed Cases for PH"),
                   xaxis=dict(tickmode='array', #tick0=4, dtick=0, 
                              tickfont_size=9, title="Date", showgrid=False),
                   legend=dict(orientation="v", title="",
                              x=0.1, y=0.85, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))
fig2.update_layout(shapes=[dict(type='line', 
                                yref='y', y0=30, y1=30, 
                                xref='paper', x0=0, x1=1, 
                                line=dict(color="orange", width=1))])
fig2.show()
In [93]:
d2 = d1.copy()
d2['doubling_time1'] = np.log(2) / (d2['%Change']/100)
d2['doubling_time2'] = np.log(2) / (d2['trend']/100)

d2['Days'] = 'Doubling Time'
d2['Trend_name1'] = 'Trend (based on HP Filter)'
fig_trend = px.line(d2, x='Date', y='doubling_time2', color='Trend_name1', 
                    labels={'doubling_time2': 'Days-To-Double'},
                    color_discrete_sequence=["tomato"])

fig2 = px.line(d2, x='Date', y='doubling_time1', color='Days',
               labels={'doubling_time1': 'Days-To-Double'},
               color_discrete_sequence=["lightpink"])
fig2.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}))
fig2.update_yaxes(title="Days-to-Double", showgrid=True, tickfont_size=15)
fig2.update_layout(title=dict(x=0.5, text="Doubling Time in Death Cases for PH"),
                   xaxis=dict(tickmode='array', #tick0=4, dtick=0, 
                              tickfont_size=9, title="Date", showgrid=False),
                   legend=dict(orientation="v", title="",
                              x=0.05, y=0.85, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

fig2.update_layout(shapes=[dict(type= 'line', 
                                yref= 'y', y0=30, y1=30, 
                                xref= 'paper', x0=0, x1=1, 
                                line=dict(color="orange", width=1))])
fig2.show()
In [94]:
# version3
r2 = r1.copy()
r2['doubling_time1'] = np.log(2) / np.log(1 + (r2['%Change']/100))
r2['doubling_time2'] = np.log(2) /  np.log(1 + (r2['trend']/100))

r2['Days'] = 'Doubling Time'
r2['Trend_name1'] = 'Trend (based on HP Filter)'
fig_trend = px.line(r2, x='Date', y='doubling_time2', color='Trend_name1',
                    labels={'doubling_time2': 'Days-To-Double'},
                    color_discrete_sequence=["limegreen"])

fig2 = px.line(r2, x='Date', y='doubling_time1', color='Days',
               labels={'doubling_time1': 'Days-To-Double'},
               color_discrete_sequence=["lightgreen"])
fig2.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}))
fig2.update_yaxes(title="Days-to-Double", showgrid=True, tickfont_size=15)
fig2.update_layout(title=dict(x=0.5, text="Doubling Time in Recovered Cases for PH"),
                   xaxis=dict(tickmode='array', #tick0=4, dtick=0, 
                              tickfont_size=9, title="Date", showgrid=False),
                   legend=dict(orientation="v", title="",
                              x=0.05, y=0.85, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

# fig2.update_layout(shapes=[dict(type= 'line', 
#                                 yref= 'y', y0=30, y1=30, 
#                                 xref= 'paper', x0=0, x1=1, 
#                                 line=dict(color="orange", width=1))])
fig2.show()

PH vs ASEAN Countries

Data Source: https://www.worldometers.info/coronavirus/

In [95]:
print("WORLDOMETER Last download: ", pd.to_datetime('today').strftime("%b %d, %Y %I:%M:%S"))
WORLDOMETER Last download:  May 28, 2020 05:58:14
In [96]:
url = 'https://www.worldometers.info/coronavirus/'
html = requests.get(url).content

df_list = pd.read_html(html)
df = df_list[-1].iloc[1:-1,1:]
In [97]:
def human_format(num):
    num = float('{:.3g}'.format(num))
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])

asean = ['Philippines', 'Indonesia', 'Malaysia', 'Singapore', 'Thailand', 
     'Brunei', 'Laos', 'Cambodia', 'Vietnam', 'Burma', 'Timor Leste']

df_asean = df[df['Country,Other'].isin(asean)].reset_index(drop=True)
df_asean['Pop'] = df_asean['Population'].apply(lambda x : human_format(x))

df_asean['%Active'] = round(df_asean['ActiveCases'] / df_asean['TotalCases']*100, 1)
df_asean['%Death'] = round(df_asean['TotalDeaths'] / df_asean['TotalCases']*100, 1)
df_asean['%Recovered'] = round(df_asean['TotalRecovered'] / df_asean['TotalCases']*100,1)
In [98]:
figq = px.bar(df_asean[::-1], x='TotalCases', y='Country,Other',
              orientation='h', text='TotalCases', hover_data=['Pop'],
              labels={'Country,Other': 'Country'}, #hover_data=['ActiveCases'],
              color_discrete_sequence=[px.colors.qualitative.Plotly[0]])

figq.update_traces(textposition='outside', textfont_size=12)
figq.update_yaxes(tickfont_size=15)
figq.update_xaxes(range=(0, df_asean['TotalCases'].max()*1.1))
figq.update_layout(xaxis=dict(tickfont_size=9, title='Number of Cases'),
                   yaxis=dict(title=None),
                   title=dict(x=0.5, text="Covid-19 Confirmed Cases in ASEAN Region"),
                   # legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

figq.show()

#############################################################################
#############################################################################
#############################################################################

figA = px.bar(df_asean[::-1], x='%Active', y='Country,Other',
              orientation='h', text='%Active', 
              labels={'Country,Other': 'Country'}, hover_data=['ActiveCases'],
              color_discrete_sequence=[px.colors.qualitative.Plotly[0]])

figA.update_traces(textposition='outside', textfont_size=12)
figA.update_layout(xaxis=dict(tickfont_size=9),
                  #title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                  #legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
# figA.show()

figB = px.bar(df_asean[::-1], x='%Death', y='Country,Other',
              orientation='h', text='%Death', 
              labels={'Country,Other': 'Country'}, hover_data=['TotalDeaths'],
              color_discrete_sequence=[px.colors.qualitative.Plotly[1]])

figB.update_traces(textposition='outside', textfont_size=12)
figB.update_layout(xaxis=dict(tickfont_size=9),
                  #title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                  #legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
# figA.show()

figC = px.bar(df_asean[::-1], x='%Recovered', y='Country,Other',
              orientation='h', text='%Recovered', 
              labels={'Country,Other': 'Country'}, hover_data=['TotalRecovered'],
              color_discrete_sequence=[px.colors.qualitative.Plotly[2]])

figC.update_traces(textposition='outside', textfont_size=12)
figC.update_layout(xaxis=dict(tickfont_size=9),
                  #title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                  #legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
# figC.show()

##############################################################################

fig = px.bar(df_asean[::-1], x='Tot\xa0Cases/1M pop', y='Country,Other',
             orientation='h', text='Tot\xa0Cases/1M pop', labels={'Country,Other': 'Country'}, 
             hover_name='Tot\xa0Cases/1M pop', hover_data=['TotalCases', 'Pop'], 
             color_discrete_sequence=[px.colors.qualitative.Plotly[0]])

fig.update_traces(textposition='outside', textfont_size=12)
fig.update_layout(xaxis=dict(tickfont_size=9),
                  #title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                  #legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
# fig.show()

fig1 = px.bar(df_asean[::-1], x='Deaths/1M pop', y='Country,Other', 
              orientation='h', text='Deaths/1M pop', labels={'Country,Other': 'Country'}, 
              hover_name='Deaths/1M pop', hover_data=['TotalDeaths', 'Pop'], 
              color_discrete_sequence=[px.colors.qualitative.Plotly[1]])

fig1.update_traces(textposition='outside', textfont_size=12)
fig1.update_layout(xaxis=dict(tickfont_size=9),
                  #title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                  #legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
#fig1.show()

fig2 = px.bar(df_asean[::-1], x='Tests/ 1M pop', y='Country,Other', 
              orientation='h', text='Tests/ 1M pop', labels={'Country,Other': 'Country'},
              hover_name='Tests/ 1M pop', hover_data=['TotalTests', 'Pop'], 
              color_discrete_sequence=[px.colors.qualitative.Plotly[2]])

fig2.update_traces(textposition='outside', textfont_size=12)
fig2.update_layout(xaxis=dict(tickfont_size=9),
                  #title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                  #legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
#fig2.show()

##############################################################################
##############################################################################
##############################################################################

fig000 = make_subplots(rows=2, cols=3, subplot_titles=("%Active Cases", "%Deaths", "%Recovered", 
                                                       "Cases per 1M Pop'n", "Deaths per 1M Pop'n", "Test per 1M Pop'n"))

# FigA
fig000.add_trace(figA.data[0], row=1, col=1)
fig000.update_yaxes(tickfont_size=15, row=1, col=1)
fig000.update_xaxes(range=(0, df_asean['%Active'].max()*1.25), title='Percent(%)', row=1, col=1)
# fig000.update_xaxes(range=(0, 120), row=1, col=1)

# FigB
fig000.add_trace(figB.data[0], row=1, col=2)
fig000.update_yaxes(showticklabels=False, row=1, col=2)
fig000.update_xaxes(range=(0, df_asean['%Death'].max()*1.5), title='Percent(%)', row=1, col=2)
# fig000.update_xaxes(range=(0, 120), row=1, col=2)

# FigC
fig000.add_trace(figC.data[0], row=1, col=3)
fig000.update_yaxes(showticklabels=False, row=1, col=3)
fig000.update_xaxes(range=(0, df_asean['%Recovered'].max()*1.25), title='Percent(%)', row=1, col=3)
# fig000.update_xaxes(range=(0, 120), row=1, col=3)

##############################################################################
# Fig
fig000.add_trace(fig.data[0], row=2, col=1)
fig000.update_yaxes(tickfont_size=15, row=2, col=1)
fig000.update_xaxes(range=(0, df_asean['Tot\xa0Cases/1M pop'].max()*1.25), title='Count', row=2, col=1)

# Fig1
fig000.add_trace(fig1.data[0], row=2, col=2)
fig000.update_yaxes(showticklabels=False, row=2, col=2)
fig000.update_xaxes(range=(0, df_asean['Deaths/1M pop'].max()*1.25), title='Count', row=2, col=2)

# Fig2
fig000.add_trace(fig2.data[0], row=2, col=3)
fig000.update_yaxes(showticklabels=False, row=2, col=3)
fig000.update_xaxes(range=(0, df_asean['Tests/ 1M pop'].max()*1.25), title='Count', row=2, col=3)

fig000.update_layout(hoverlabel=dict(font_size=20), height=1000)

fig000.show()

Epidepic Modeling using SIR

SIR model is a kind of compartmental model describing the dynamics of infectious disease. You may wonder why it is called the “compartmental model.” The model divides the population into compartments. Each compartment is expected to have the same characteristics. SIR represents the three compartments segmented by the model.

  1. Susceptible is a group of people who are vulnerable to exposure with infectious people. They can be patient when the infection happens.
  2. The group of Infectious represents the infected people. They can pass the disease to susceptible people and can be recovered in a specific period.
  3. Recovered people get immunity so that they are not susceptible to the same illness anymore. SIR model is a framework describing how the number of people in each group can change over time.

Source: https://www.lewuathe.com/covid-19-dynamics-with-sir-model.html

In [99]:
#!/usr/bin/python
import numpy as np
import pandas as pd
from csv import reader
from csv import writer
from scipy.integrate import solve_ivp
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from datetime import timedelta, datetime
import argparse
import sys
import json
import ssl
import urllib.request
In [100]:
# DATA FROM JHU SSE

df_confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
df_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
df_recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

country_df_confirmed = df_confirmed[df_confirmed['Country/Region'] == 'Philippines']
country_df_deaths = df_deaths[df_deaths['Country/Region'] == 'Philippines']
country_df_recovered = df_recovered[df_recovered['Country/Region'] == 'Philippines']
In [101]:
start_date = '2/1/20' # First Case

recovered = country_df_recovered.iloc[0].loc[start_date:]
death = country_df_deaths.iloc[0].loc[start_date:]
confirmed = country_df_confirmed.iloc[0].loc[start_date:]
data = confirmed - recovered - death

First, let's set the initial parameters. For the sake of this modeling effort, we will just play with estimates.
Let's say $S_0$ = 100000 which corresponds to the initial susceptible. Meanwhile, we will set $I_0$ = 1 (initial infected) and $R_0$ = 0 (initial recovered), same as the PH statistics we have as of January 30, 2020.

In [102]:
s_0 = 100000 # Initial Susceptible; Setting the whole population in the country is not realistic for sure.
i_0 = 1 # country_df_confirmed.iloc[0].loc[start_date:][0] # 1, Initial Infected
r_0 = 0 # country_df_recovered.iloc[0].loc[start_date:][0] # 0, Initial Recovered
dayforecast = 60 # next 30 days

values = data.index
current = datetime.strptime(data.index[-1], '%m/%d/%y')
new_size = len(values)+dayforecast
In [103]:
print('Actual data as of:', values[-1])
Actual data as of: 5/27/20
In [104]:
while len(values) < new_size:
    current = current + timedelta(days=1)
    values = np.append(values, datetime.strftime(current, '%m/%d/%y'))
    
new_index = values
size = len(new_index)
print('Forecast until', '(+'+str(dayforecast)+' days):', new_index[-1])
Forecast until (+60 days): 07/26/20
In [105]:
# loss Function based on root MSE
def loss(point, data, recovered, s_0, i_0, r_0):
    size = len(data)
    beta, gamma = point
    
    def SIR(t, y):
        S = y[0]
        I = y[1]
        R = y[2]
        return [-beta*S*I, beta*S*I-gamma*I, gamma*I]
    
    solution = solve_ivp(SIR, [0, size], [s_0, i_0, r_0], t_eval=np.arange(0, size, 1), vectorized=True)
    l1 = np.sqrt(np.mean((solution.y[1] - data)**2))
    l2 = np.sqrt(np.mean((solution.y[2] - recovered)**2))
    alpha = 0.1
    return alpha * l1 + (1 - alpha) * l2

# SIR Model
def SIR(t, y):
    S = y[0]
    I = y[1]
    R = y[2]
    return [-beta*S*I, beta*S*I-gamma*I, gamma*I]

Next, find the values of beta (β) and gamma (γ) that give the smallest residual sum of squares (RSS), which represents the best fit to the data. β is the transmissivity rate or the parameter controlling how much the disease can be transmitted through exposure. It is determined by the chance of contact and the probability of disease transmission. γ is the recovery rate or the parameter expressing how much the disease can be recovered in a specific period.

In [106]:
# Optimizate to estimate the beta and gamma fitting the given confirmed cases
optimal = minimize(loss, [0.001, 0.001], args=(data, recovered, s_0, i_0, r_0), 
                   method='L-BFGS-B', bounds=[(0.00000001, 0.4), (0.00000001, 0.4)])
print(optimal)
      fun: 582.9738391310784
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 1.29773984e+07, -5.04369609e+01])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 234
      nit: 11
   status: 0
  success: True
        x: array([1.05462258e-06, 1.84924871e-02])
In [107]:
beta, gamma = optimal.x
print('β: ', beta)
print('γ: ', gamma)
β:  1.0546225784657421e-06
γ:  0.018492487104844316
In [108]:
# Predict Future Values Based on SIR Model
new_index = values
size = len(new_index)
extended_actual = np.concatenate((data.values, [None] * (size - len(data.values))))
extended_recovered = np.concatenate((recovered.values, [None] * (size - len(recovered.values))))
extended_death = np.concatenate((death.values, [None] * (size - len(death.values))))
prediction = solve_ivp(SIR, [0, size], [s_0, i_0, r_0], t_eval=np.arange(0, size, 1))
In [109]:
# Plot the graph
df = pd.DataFrame({'Active Cases (Infected)': extended_actual, 'Recovered Cases': extended_recovered, 'Death Cases': extended_death, 
                   'Susceptible': prediction.y[0], 'Infected': prediction.y[1], 'Recovered': prediction.y[2]}, index=new_index)
df.index = pd.to_datetime(df.index)


df['AC'] = df.columns[0]
df['RC'] = df.columns[1]
df['DC'] = df.columns[2]
df['S'] = df.columns[3]
df['I'] = df.columns[4]
df['R'] = df.columns[5]

fig = px.line(df, x=df.index, y=df.columns[0], color='AC')
fig.add_trace(px.line(df, x=df.index, y=df.columns[1], color='RC', color_discrete_sequence=['green']).data[0])
fig.add_trace(px.line(df, x=df.index, y=df.columns[2], color='DC', color_discrete_sequence=['red']).data[0])
# Forecast
fig.add_trace(px.line(df, x=df.index, y=df.columns[3], color='S', color_discrete_sequence=['orange']).data[0].update(line={'dash': 'dash'}))
fig.add_trace(px.line(df, x=df.index, y=df.columns[4], color='I', color_discrete_sequence=['blue']).data[0].update(line={'dash': 'dash'}))
fig.add_trace(px.line(df, x=df.index, y=df.columns[5], color='R', color_discrete_sequence=['green']).data[0].update(line={'dash': 'dash'}))

fig.update_layout(title=dict(x=0.5, text="SIR Model"),
                  xaxis=dict(tickmode='array', tickfont_size=12, title="Date", showgrid=False),
                  yaxis=dict(title='Cases'),
                  legend=dict(orientation="v", title="", x=0.02, y=0.1, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
fig.show()

Logistic Growth Modeling

In a pandemic, the number of cases is expected to have an exponential growth but eventually, it will reach a certain limit given the efforts of the government / health sectors in preventing the spread of the disease. Considering this, we can imagine an "S" curve or in mathematical terms, it's a logistic curve. Therefore, a good way to model is to fit our data in a logistic curve to get better forecast. Thankfully, there is a Python library that we can use.

Prophet is a forecasting procedure developed and released by Facebook’s Core Data Science team. Aside from the typical linear growth time series modeling, it can also handle logistic growth.

In [110]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
import pickle
import math
import scipy.optimize as optimize
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

import logging
logging.getLogger('fbprophet').setLevel(logging.WARNING)

Confirmed Cases

In [111]:
#cases_daily = pd.read_excel("./data/cases_daily.xlsx")
cases_daily = confirmed.diff().reset_index().rename({'index': 'DateRepConf', 182: 'CaseCode'}, axis=1)

data = cases_daily.set_index('DateRepConf')
data.index = pd.to_datetime(data.index) ######################################

idx = pd.date_range(data.index[0].date(), data.index[-1].date())
data.index = pd.DatetimeIndex(data.index) 
data = data.reindex(idx, fill_value=0)

data=data.cumsum()
data = data.reset_index(drop=False)
data.columns = ['Timestep', 'Total Cases']
data['Total Cases'] = data['Total Cases'].fillna(0).astype('int64') ##########

The challenge now is to define the "maximum carrying capacity" as required by the logistic growth function of the Prophet tool. Supposedly, this is a known variable but in our case, we really don't know our ceiling (for the 1st wave at least). To make a logical way to define this, let's derive the capacity based on the smallest mean square error (MSE) from the last 30 days of available data. Meaning, we will compute the best fit given a certain range of capacity or ceiling.

In [112]:
lalala = pd.DataFrame()

for cap in list(range(round(data['Total Cases'].tail(1).values[0],-2), 21000, 500)):
    df = data.copy()
    df['cap'] = cap
    df.columns = ['ds', 'y', 'cap']
    # df.columns = ['ds', 'y']
    df

    # STEP 1: Model Fitting
    m = Prophet(growth="logistic", interval_width=0.90)
    m.fit(df)
    
    # STEP 2: Specify the number of days to extend into the future.
    forecast_period = 14 # next two weeks
    future = m.make_future_dataframe(periods=forecast_period)
    future['cap'] = df['cap'].iloc[0]

    # STEP 3: Predict the future
    forecast = m.predict(future)
    
    from sklearn.metrics import mean_squared_error 
  
    # Actual data (Y) on Last 30 days
    Y_true = data['Total Cases'][-7:].tolist()  # Y_true = Y (original values) 

    # Calculated values (Yhat) on the last 30 days 
    Y_pred = forecast['yhat'][:-forecast_period][-7:].tolist()  # Y_pred = Y' 

    # Calculation of Mean Squared Error (MSE) 
    mse = mean_squared_error(Y_true,Y_pred) 
    
    lalala = lalala.append(pd.DataFrame([[cap, mse]], columns=['Capacity','Mean Square Error']))
In [113]:
fig = px.line(lalala, x='Capacity', y='Mean Square Error')
smallest_mse = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
fig.add_annotation(dict(xref="x", yref="y", showarrow=True,
                            arrowhead=2, xanchor='center', yanchor='bottom',
                            ax=0, ay=-100,
                            x=smallest_mse, y=lalala['Mean Square Error'].min(),
                            text="Smallest MSE : <br> Capacity = " + str(smallest_mse), font_size=18, font_color = 'blue'))
fig.update_layout(title="MSE (Based on the last 30 days) for Each Capacity")
fig.show()

Now, we can proceed with the modeling given the capacity computed above. We will also add our forecast based on this on the next 30 days. To consider the uncertainties, we will be defining an interval width at 90% confidence.

In [114]:
# STEP 0: Capacity computed based on MSE
df = data.copy()
df['cap'] = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
df.columns = ['ds', 'y', 'cap']
# df.columns = ['ds', 'y']
df

# STEP 1: Model Fitting
m = Prophet(growth="logistic", interval_width=0.90)
m.fit(df)

# STEP 2: Specify the number of days to extend into the future.
future = m.make_future_dataframe(periods=30)
future['cap'] = df['cap'].iloc[0]

# STEP 3: Predict the future
forecast = m.predict(future)
In [115]:
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()

fig = plot_plotly(m, forecast, xlabel="Date", ylabel="Case Count", figsize=(1000, 700))
fig.update_layout(title="PH Forecast (Next 30 days) on Total Confirmed Cases", hoverlabel=dict(font_size=20))
fig.update_traces(line=dict(color='blue'))
fig.show()
In [116]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

metric_df = forecast.set_index('ds')[['yhat']].join(df.set_index('ds').y).reset_index()
metric_df.dropna(inplace=True)

print("------------ Accuracy ------------")
print("R-squared: ", round(r2_score(metric_df.y, metric_df.yhat),4)*100, '%')
print("Mean Squared Error: ", mean_squared_error(metric_df.y, metric_df.yhat))
print("Mean Absolute Error: ", mean_absolute_error(metric_df.y, metric_df.yhat))
------------ Accuracy ------------
R-squared:  99.08 %
Mean Squared Error:  218094.05162581877
Mean Absolute Error:  381.3835650869009

Deaths

In [117]:
deaths_daily = pd.read_excel("./data/deaths_daily.xlsx")

data = deaths_daily.set_index('DateRepRem')
data.index = pd.to_datetime(data.index) ######################################

# idx = pd.date_range(data.index[0].date(), data.index[-1].date())
idx = pd.date_range(data.index[0].date(), data.index[-1].date())
data.index = pd.DatetimeIndex(data.index) 
data = data.reindex(idx, fill_value=0)

data=data.cumsum()
data = data.reset_index(drop=False)
data.columns = ['Timestep', 'Total Cases']
data['Total Cases'] = data['Total Cases'].fillna(0).astype('int64') ##########
In [118]:
lalala = pd.DataFrame()

for cap in list(range(round(data['Total Cases'].tail(1).values[0],-2), 1500, 10)):
    df = data.copy()
    df['cap'] = cap
    df.columns = ['ds', 'y', 'cap']
    # df.columns = ['ds', 'y']
    df

    # STEP 1: Model Fitting
    m = Prophet(growth="logistic", interval_width=0.90)
    m.fit(df)

    # STEP 2: Specify the number of days to extend into the future.
    future = m.make_future_dataframe(periods=14)
    future['cap'] = df['cap'].iloc[0]

    # STEP 3: Predict the future
    forecast = m.predict(future)
    
    from sklearn.metrics import mean_squared_error 
  
    # Actual data (Y) on Last 30 days
    Y_true = data['Total Cases'][-30:].tolist()  # Y_true = Y (original values) 

    # Calculated values (Yhat) on the last 30 days 
    Y_pred = forecast['yhat'][:-14][-30:].tolist()  # Y_pred = Y' 

    # Calculation of Mean Squared Error (MSE) 
    mse = mean_squared_error(Y_true,Y_pred) 
    
    lalala = lalala.append(pd.DataFrame([[cap, mse]], columns=['Capacity','Mean Square Error']))
In [119]:
# fig = px.line(lalala, x='Capacity', y='Mean Square Error')
# smallest_mse = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True,
#                             arrowhead=2, xanchor='center', yanchor='bottom',
#                             ax=0, ay=-100,
#                             x=smallest_mse, y=lalala['Mean Square Error'].min(),
#                             text="Smallest MSE : <br> Capacity = " + str(smallest_mse), font_size=18, font_color = 'blue'))
# fig.update_layout(title="MSE (Based on the last 30 days) for Each Capacity")
# fig.show()
In [120]:
# STEP 0: Capacity computed based on MSE
df = data.copy()
df['cap'] = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
df.columns = ['ds', 'y', 'cap']
# df.columns = ['ds', 'y']
df

# STEP 1: Model Fitting
m = Prophet(growth="logistic", interval_width=0.90)
m.fit(df)

# STEP 2: Specify the number of days to extend into the future.
future = m.make_future_dataframe(periods=30)
future['cap'] = df['cap'].iloc[0]

# STEP 3: Predict the future
forecast = m.predict(future)
In [121]:
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()

fig = plot_plotly(m, forecast, xlabel="Date", ylabel="Case Count", figsize=(1000, 700))
fig.update_layout(title="PH Forecast (Next 30 days) on Death Cases", hoverlabel=dict(font_size=20))
fig.update_traces(line=dict(color='red'))
fig.show()
In [122]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

metric_df = forecast.set_index('ds')[['yhat']].join(df.set_index('ds').y).reset_index()
metric_df.dropna(inplace=True)

print("------------ Accuracy ------------")
print("R-squared: ", round(r2_score(metric_df.y, metric_df.yhat),4)*100, '%')
print("Mean Squared Error: ", mean_squared_error(metric_df.y, metric_df.yhat))
print("Mean Absolute Error: ", mean_absolute_error(metric_df.y, metric_df.yhat))
------------ Accuracy ------------
R-squared:  99.48 %
Mean Squared Error:  502.5349268997346
Mean Absolute Error:  17.525537455347994

Recoveries

In [123]:
recoveries_daily = pd.read_excel("./data/recoveries_daily.xlsx")

data = recoveries_daily.set_index('DateRepRem')
data.index = pd.to_datetime(data.index) ######################################

# idx = pd.date_range(data.index[0].date(), data.index[-1].date())
idx = pd.date_range(data.index[0].date(), data.index[-1].date())
data.index = pd.DatetimeIndex(data.index) 
data = data.reindex(idx, fill_value=0)

data=data.cumsum()
data = data.reset_index(drop=False)
data.columns = ['Timestep', 'Total Cases']
data['Total Cases'] = data['Total Cases'].fillna(0).astype('int64') ##########
In [124]:
lalala = pd.DataFrame()

for cap in list(range(round(data['Total Cases'].tail(1).values[0],-2), 10000, 100)):
    df = data.copy()
    df['cap'] = cap
    df.columns = ['ds', 'y', 'cap']
    # df.columns = ['ds', 'y']
    df

    # STEP 1: Model Fitting
    m = Prophet(growth="logistic", interval_width=0.90)
    m.fit(df)

    # STEP 2: Specify the number of days to extend into the future.
    future = m.make_future_dataframe(periods=14)
    future['cap'] = df['cap'].iloc[0]

    # STEP 3: Predict the future
    forecast = m.predict(future)
    
    from sklearn.metrics import mean_squared_error 
  
    # Actual data (Y) on Last 30 days
    Y_true = data['Total Cases'][-30:].tolist()  # Y_true = Y (original values) 

    # Calculated values (Yhat) on the last 30 days 
    Y_pred = forecast['yhat'][:-14][-30:].tolist()  # Y_pred = Y' 

    # Calculation of Mean Squared Error (MSE) 
    mse = mean_squared_error(Y_true,Y_pred) 
    
    lalala = lalala.append(pd.DataFrame([[cap, mse]], columns=['Capacity','Mean Square Error']))
In [125]:
# fig = px.line(lalala, x='Capacity', y='Mean Square Error')
# smallest_mse = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True,
#                             arrowhead=2, xanchor='center', yanchor='bottom',
#                             ax=0, ay=-100,
#                             x=smallest_mse, y=lalala['Mean Square Error'].min(),
#                             text="Smallest MSE : <br> Capacity = " + str(smallest_mse), font_size=18, font_color = 'blue'))
# fig.update_layout(title="MSE (Based on the last 30 days) for Each Capacity")
# fig.show()
In [126]:
# STEP 0: Capacity computed based on MSE
df = data.copy()
df['cap'] = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
df.columns = ['ds', 'y', 'cap']
# df.columns = ['ds', 'y']
df

# STEP 1: Model Fitting
m = Prophet(growth="logistic", interval_width=0.90)
m.fit(df)

# STEP 2: Specify the number of days to extend into the future.
future = m.make_future_dataframe(periods=30)
future['cap'] = df['cap'].iloc[0]

# STEP 3: Predict the future
forecast = m.predict(future)
In [127]:
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()

fig = plot_plotly(m, forecast, xlabel="Date", ylabel="Case Count", figsize=(1000, 700))
fig.update_layout(title="PH Forecast (Next 30 days) on Recovered Cases", hoverlabel=dict(font_size=20))
fig.update_traces(line=dict(color='green'))
fig.show()
In [128]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

metric_df = forecast.set_index('ds')[['yhat']].join(df.set_index('ds').y).reset_index()
metric_df.dropna(inplace=True)

print("------------ Accuracy ------------")
print("R-squared: ", round(r2_score(metric_df.y, metric_df.yhat),4)*100, '%')
print("Mean Squared Error: ", mean_squared_error(metric_df.y, metric_df.yhat))
print("Mean Absolute Error: ", mean_absolute_error(metric_df.y, metric_df.yhat))
------------ Accuracy ------------
R-squared:  99.8 %
Mean Squared Error:  1925.7136993775998
Mean Absolute Error:  30.751504278869735